{ "best_metric": 0.84, "best_model_checkpoint": "MAE-CT-CPC-Dicotomized-v6-Day1/checkpoint-896", "epoch": 49.00218181818182, "eval_steps": 500, "global_step": 2750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.6363636363636366e-07, "loss": 0.7109, "step": 10 }, { "epoch": 0.01, "learning_rate": 7.272727272727273e-07, "loss": 0.6964, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.090909090909091e-06, "loss": 0.6618, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.4545454545454546e-06, "loss": 0.6946, "step": 40 }, { "epoch": 0.02, "learning_rate": 1.8181818181818183e-06, "loss": 0.638, "step": 50 }, { "epoch": 0.02, "eval_accuracy": 0.64, "eval_loss": 0.6488345265388489, "eval_runtime": 5.4784, "eval_samples_per_second": 4.563, "eval_steps_per_second": 1.278, "step": 56 }, { "epoch": 1.0, "learning_rate": 2.181818181818182e-06, "loss": 0.6898, "step": 60 }, { "epoch": 1.01, "learning_rate": 2.5454545454545456e-06, "loss": 0.6249, "step": 70 }, { "epoch": 1.01, "learning_rate": 2.9090909090909093e-06, "loss": 0.5873, "step": 80 }, { "epoch": 1.01, "learning_rate": 3.272727272727273e-06, "loss": 0.6824, "step": 90 }, { "epoch": 1.02, "learning_rate": 3.6363636363636366e-06, "loss": 0.5355, "step": 100 }, { "epoch": 1.02, "learning_rate": 4.000000000000001e-06, "loss": 0.7032, "step": 110 }, { "epoch": 1.02, "eval_accuracy": 0.64, "eval_loss": 0.6394311785697937, "eval_runtime": 5.0835, "eval_samples_per_second": 4.918, "eval_steps_per_second": 1.377, "step": 112 }, { "epoch": 2.0, "learning_rate": 4.363636363636364e-06, "loss": 0.6825, "step": 120 }, { "epoch": 2.01, "learning_rate": 4.727272727272728e-06, "loss": 0.5733, "step": 130 }, { "epoch": 2.01, "learning_rate": 5.090909090909091e-06, "loss": 0.6115, "step": 140 }, { "epoch": 2.01, "learning_rate": 5.4545454545454545e-06, "loss": 0.5805, "step": 150 }, { "epoch": 2.02, "learning_rate": 5.8181818181818185e-06, "loss": 0.6243, "step": 160 }, { "epoch": 2.02, "eval_accuracy": 0.64, "eval_loss": 0.5943312048912048, "eval_runtime": 5.0387, "eval_samples_per_second": 4.962, "eval_steps_per_second": 1.389, "step": 168 }, { "epoch": 3.0, "learning_rate": 6.181818181818182e-06, "loss": 0.6231, "step": 170 }, { "epoch": 3.0, "learning_rate": 6.545454545454546e-06, "loss": 0.5273, "step": 180 }, { "epoch": 3.01, "learning_rate": 6.90909090909091e-06, "loss": 0.4817, "step": 190 }, { "epoch": 3.01, "learning_rate": 7.272727272727273e-06, "loss": 0.647, "step": 200 }, { "epoch": 3.02, "learning_rate": 7.636363636363638e-06, "loss": 0.5636, "step": 210 }, { "epoch": 3.02, "learning_rate": 8.000000000000001e-06, "loss": 0.6339, "step": 220 }, { "epoch": 3.02, "eval_accuracy": 0.6, "eval_loss": 0.6504328846931458, "eval_runtime": 4.8576, "eval_samples_per_second": 5.147, "eval_steps_per_second": 1.441, "step": 224 }, { "epoch": 4.0, "learning_rate": 8.363636363636365e-06, "loss": 0.5425, "step": 230 }, { "epoch": 4.01, "learning_rate": 8.727272727272728e-06, "loss": 0.5086, "step": 240 }, { "epoch": 4.01, "learning_rate": 9.090909090909091e-06, "loss": 0.5566, "step": 250 }, { "epoch": 4.01, "learning_rate": 9.454545454545456e-06, "loss": 0.7499, "step": 260 }, { "epoch": 4.02, "learning_rate": 9.81818181818182e-06, "loss": 0.524, "step": 270 }, { "epoch": 4.02, "learning_rate": 9.97979797979798e-06, "loss": 0.6566, "step": 280 }, { "epoch": 4.02, "eval_accuracy": 0.64, "eval_loss": 0.6404885649681091, "eval_runtime": 4.8837, "eval_samples_per_second": 5.119, "eval_steps_per_second": 1.433, "step": 280 }, { "epoch": 5.0, "learning_rate": 9.939393939393939e-06, "loss": 0.5201, "step": 290 }, { "epoch": 5.01, "learning_rate": 9.8989898989899e-06, "loss": 0.6411, "step": 300 }, { "epoch": 5.01, "learning_rate": 9.85858585858586e-06, "loss": 0.4865, "step": 310 }, { "epoch": 5.01, "learning_rate": 9.81818181818182e-06, "loss": 0.5733, "step": 320 }, { "epoch": 5.02, "learning_rate": 9.777777777777779e-06, "loss": 0.4056, "step": 330 }, { "epoch": 5.02, "eval_accuracy": 0.64, "eval_loss": 0.6274449825286865, "eval_runtime": 5.0813, "eval_samples_per_second": 4.92, "eval_steps_per_second": 1.378, "step": 336 }, { "epoch": 6.0, "learning_rate": 9.737373737373738e-06, "loss": 0.7089, "step": 340 }, { "epoch": 6.01, "learning_rate": 9.696969696969698e-06, "loss": 0.5468, "step": 350 }, { "epoch": 6.01, "learning_rate": 9.656565656565657e-06, "loss": 0.5937, "step": 360 }, { "epoch": 6.01, "learning_rate": 9.616161616161616e-06, "loss": 0.4663, "step": 370 }, { "epoch": 6.02, "learning_rate": 9.575757575757576e-06, "loss": 0.5177, "step": 380 }, { "epoch": 6.02, "learning_rate": 9.535353535353537e-06, "loss": 0.3523, "step": 390 }, { "epoch": 6.02, "eval_accuracy": 0.64, "eval_loss": 0.9109216928482056, "eval_runtime": 5.0266, "eval_samples_per_second": 4.974, "eval_steps_per_second": 1.393, "step": 392 }, { "epoch": 7.0, "learning_rate": 9.494949494949497e-06, "loss": 0.6953, "step": 400 }, { "epoch": 7.01, "learning_rate": 9.454545454545456e-06, "loss": 0.4354, "step": 410 }, { "epoch": 7.01, "learning_rate": 9.414141414141414e-06, "loss": 0.3926, "step": 420 }, { "epoch": 7.01, "learning_rate": 9.373737373737375e-06, "loss": 0.4058, "step": 430 }, { "epoch": 7.02, "learning_rate": 9.333333333333334e-06, "loss": 0.4581, "step": 440 }, { "epoch": 7.02, "eval_accuracy": 0.8, "eval_loss": 0.4675277769565582, "eval_runtime": 5.0375, "eval_samples_per_second": 4.963, "eval_steps_per_second": 1.39, "step": 448 }, { "epoch": 8.0, "learning_rate": 9.292929292929294e-06, "loss": 0.597, "step": 450 }, { "epoch": 8.0, "learning_rate": 9.252525252525253e-06, "loss": 0.4385, "step": 460 }, { "epoch": 8.01, "learning_rate": 9.212121212121213e-06, "loss": 0.687, "step": 470 }, { "epoch": 8.01, "learning_rate": 9.171717171717172e-06, "loss": 0.6669, "step": 480 }, { "epoch": 8.02, "learning_rate": 9.131313131313132e-06, "loss": 0.5107, "step": 490 }, { "epoch": 8.02, "learning_rate": 9.090909090909091e-06, "loss": 0.3648, "step": 500 }, { "epoch": 8.02, "eval_accuracy": 0.76, "eval_loss": 0.5780391693115234, "eval_runtime": 4.9069, "eval_samples_per_second": 5.095, "eval_steps_per_second": 1.427, "step": 504 }, { "epoch": 9.0, "learning_rate": 9.050505050505052e-06, "loss": 0.2305, "step": 510 }, { "epoch": 9.01, "learning_rate": 9.010101010101012e-06, "loss": 0.2993, "step": 520 }, { "epoch": 9.01, "learning_rate": 8.969696969696971e-06, "loss": 0.4265, "step": 530 }, { "epoch": 9.01, "learning_rate": 8.92929292929293e-06, "loss": 0.6946, "step": 540 }, { "epoch": 9.02, "learning_rate": 8.888888888888888e-06, "loss": 0.4191, "step": 550 }, { "epoch": 9.02, "learning_rate": 8.84848484848485e-06, "loss": 0.4622, "step": 560 }, { "epoch": 9.02, "eval_accuracy": 0.64, "eval_loss": 1.1120274066925049, "eval_runtime": 4.935, "eval_samples_per_second": 5.066, "eval_steps_per_second": 1.418, "step": 560 }, { "epoch": 10.0, "learning_rate": 8.808080808080809e-06, "loss": 0.2502, "step": 570 }, { "epoch": 10.01, "learning_rate": 8.767676767676768e-06, "loss": 0.2656, "step": 580 }, { "epoch": 10.01, "learning_rate": 8.727272727272728e-06, "loss": 0.3627, "step": 590 }, { "epoch": 10.01, "learning_rate": 8.686868686868687e-06, "loss": 0.4858, "step": 600 }, { "epoch": 10.02, "learning_rate": 8.646464646464647e-06, "loss": 0.5836, "step": 610 }, { "epoch": 10.02, "eval_accuracy": 0.76, "eval_loss": 0.6369267106056213, "eval_runtime": 4.9248, "eval_samples_per_second": 5.076, "eval_steps_per_second": 1.421, "step": 616 }, { "epoch": 11.0, "learning_rate": 8.606060606060606e-06, "loss": 0.3714, "step": 620 }, { "epoch": 11.01, "learning_rate": 8.565656565656566e-06, "loss": 0.4537, "step": 630 }, { "epoch": 11.01, "learning_rate": 8.525252525252527e-06, "loss": 0.1579, "step": 640 }, { "epoch": 11.01, "learning_rate": 8.484848484848486e-06, "loss": 0.2635, "step": 650 }, { "epoch": 11.02, "learning_rate": 8.444444444444446e-06, "loss": 0.7146, "step": 660 }, { "epoch": 11.02, "learning_rate": 8.404040404040405e-06, "loss": 0.316, "step": 670 }, { "epoch": 11.02, "eval_accuracy": 0.64, "eval_loss": 0.8768600225448608, "eval_runtime": 4.9305, "eval_samples_per_second": 5.07, "eval_steps_per_second": 1.42, "step": 672 }, { "epoch": 12.0, "learning_rate": 8.363636363636365e-06, "loss": 0.2406, "step": 680 }, { "epoch": 12.01, "learning_rate": 8.323232323232324e-06, "loss": 0.0525, "step": 690 }, { "epoch": 12.01, "learning_rate": 8.282828282828283e-06, "loss": 0.5016, "step": 700 }, { "epoch": 12.01, "learning_rate": 8.242424242424243e-06, "loss": 0.4659, "step": 710 }, { "epoch": 12.02, "learning_rate": 8.202020202020202e-06, "loss": 0.135, "step": 720 }, { "epoch": 12.02, "eval_accuracy": 0.72, "eval_loss": 0.6826868653297424, "eval_runtime": 4.9197, "eval_samples_per_second": 5.082, "eval_steps_per_second": 1.423, "step": 728 }, { "epoch": 13.0, "learning_rate": 8.161616161616162e-06, "loss": 0.181, "step": 730 }, { "epoch": 13.0, "learning_rate": 8.121212121212121e-06, "loss": 0.2238, "step": 740 }, { "epoch": 13.01, "learning_rate": 8.08080808080808e-06, "loss": 0.2347, "step": 750 }, { "epoch": 13.01, "learning_rate": 8.04040404040404e-06, "loss": 0.2645, "step": 760 }, { "epoch": 13.02, "learning_rate": 8.000000000000001e-06, "loss": 0.487, "step": 770 }, { "epoch": 13.02, "learning_rate": 7.95959595959596e-06, "loss": 0.0817, "step": 780 }, { "epoch": 13.02, "eval_accuracy": 0.64, "eval_loss": 0.9666504859924316, "eval_runtime": 5.0166, "eval_samples_per_second": 4.983, "eval_steps_per_second": 1.395, "step": 784 }, { "epoch": 14.0, "learning_rate": 7.91919191919192e-06, "loss": 0.1581, "step": 790 }, { "epoch": 14.01, "learning_rate": 7.87878787878788e-06, "loss": 0.1313, "step": 800 }, { "epoch": 14.01, "learning_rate": 7.838383838383839e-06, "loss": 0.1097, "step": 810 }, { "epoch": 14.01, "learning_rate": 7.797979797979799e-06, "loss": 0.108, "step": 820 }, { "epoch": 14.02, "learning_rate": 7.757575757575758e-06, "loss": 0.0504, "step": 830 }, { "epoch": 14.02, "learning_rate": 7.717171717171717e-06, "loss": 0.5254, "step": 840 }, { "epoch": 14.02, "eval_accuracy": 0.76, "eval_loss": 0.7441825866699219, "eval_runtime": 4.9503, "eval_samples_per_second": 5.05, "eval_steps_per_second": 1.414, "step": 840 }, { "epoch": 15.0, "learning_rate": 7.676767676767677e-06, "loss": 0.1904, "step": 850 }, { "epoch": 15.01, "learning_rate": 7.636363636363638e-06, "loss": 0.2687, "step": 860 }, { "epoch": 15.01, "learning_rate": 7.595959595959597e-06, "loss": 0.2626, "step": 870 }, { "epoch": 15.01, "learning_rate": 7.555555555555556e-06, "loss": 0.5363, "step": 880 }, { "epoch": 15.02, "learning_rate": 7.515151515151516e-06, "loss": 0.2692, "step": 890 }, { "epoch": 15.02, "eval_accuracy": 0.84, "eval_loss": 0.5944011211395264, "eval_runtime": 4.7556, "eval_samples_per_second": 5.257, "eval_steps_per_second": 1.472, "step": 896 }, { "epoch": 16.0, "learning_rate": 7.474747474747476e-06, "loss": 0.0987, "step": 900 }, { "epoch": 16.01, "learning_rate": 7.434343434343435e-06, "loss": 0.0755, "step": 910 }, { "epoch": 16.01, "learning_rate": 7.393939393939395e-06, "loss": 0.1298, "step": 920 }, { "epoch": 16.01, "learning_rate": 7.353535353535353e-06, "loss": 0.6588, "step": 930 }, { "epoch": 16.02, "learning_rate": 7.3131313131313146e-06, "loss": 0.0705, "step": 940 }, { "epoch": 16.02, "learning_rate": 7.272727272727273e-06, "loss": 0.0177, "step": 950 }, { "epoch": 16.02, "eval_accuracy": 0.76, "eval_loss": 1.0163482427597046, "eval_runtime": 4.846, "eval_samples_per_second": 5.159, "eval_steps_per_second": 1.444, "step": 952 }, { "epoch": 17.0, "learning_rate": 7.232323232323233e-06, "loss": 0.0157, "step": 960 }, { "epoch": 17.01, "learning_rate": 7.191919191919192e-06, "loss": 0.1494, "step": 970 }, { "epoch": 17.01, "learning_rate": 7.151515151515152e-06, "loss": 0.2343, "step": 980 }, { "epoch": 17.01, "learning_rate": 7.111111111111112e-06, "loss": 0.0901, "step": 990 }, { "epoch": 17.02, "learning_rate": 7.070707070707071e-06, "loss": 0.0386, "step": 1000 }, { "epoch": 17.02, "eval_accuracy": 0.76, "eval_loss": 0.8789149522781372, "eval_runtime": 4.9461, "eval_samples_per_second": 5.055, "eval_steps_per_second": 1.415, "step": 1008 }, { "epoch": 18.0, "learning_rate": 7.030303030303031e-06, "loss": 0.0011, "step": 1010 }, { "epoch": 18.0, "learning_rate": 6.98989898989899e-06, "loss": 0.1129, "step": 1020 }, { "epoch": 18.01, "learning_rate": 6.9494949494949505e-06, "loss": 0.0008, "step": 1030 }, { "epoch": 18.01, "learning_rate": 6.90909090909091e-06, "loss": 0.0014, "step": 1040 }, { "epoch": 18.02, "learning_rate": 6.868686868686869e-06, "loss": 0.0303, "step": 1050 }, { "epoch": 18.02, "learning_rate": 6.828282828282828e-06, "loss": 0.2142, "step": 1060 }, { "epoch": 18.02, "eval_accuracy": 0.68, "eval_loss": 1.0579525232315063, "eval_runtime": 4.9373, "eval_samples_per_second": 5.064, "eval_steps_per_second": 1.418, "step": 1064 }, { "epoch": 19.0, "learning_rate": 6.787878787878789e-06, "loss": 0.0012, "step": 1070 }, { "epoch": 19.01, "learning_rate": 6.747474747474749e-06, "loss": 0.005, "step": 1080 }, { "epoch": 19.01, "learning_rate": 6.707070707070707e-06, "loss": 0.0008, "step": 1090 }, { "epoch": 19.01, "learning_rate": 6.666666666666667e-06, "loss": 0.1648, "step": 1100 }, { "epoch": 19.02, "learning_rate": 6.626262626262627e-06, "loss": 0.0897, "step": 1110 }, { "epoch": 19.02, "learning_rate": 6.585858585858586e-06, "loss": 0.0653, "step": 1120 }, { "epoch": 19.02, "eval_accuracy": 0.72, "eval_loss": 0.9189437627792358, "eval_runtime": 4.9929, "eval_samples_per_second": 5.007, "eval_steps_per_second": 1.402, "step": 1120 }, { "epoch": 20.0, "learning_rate": 6.545454545454546e-06, "loss": 0.0044, "step": 1130 }, { "epoch": 20.01, "learning_rate": 6.505050505050505e-06, "loss": 0.0012, "step": 1140 }, { "epoch": 20.01, "learning_rate": 6.464646464646466e-06, "loss": 0.006, "step": 1150 }, { "epoch": 20.01, "learning_rate": 6.424242424242425e-06, "loss": 0.0005, "step": 1160 }, { "epoch": 20.02, "learning_rate": 6.3838383838383845e-06, "loss": 0.0004, "step": 1170 }, { "epoch": 20.02, "eval_accuracy": 0.76, "eval_loss": 1.1912809610366821, "eval_runtime": 5.1317, "eval_samples_per_second": 4.872, "eval_steps_per_second": 1.364, "step": 1176 }, { "epoch": 21.0, "learning_rate": 6.343434343434344e-06, "loss": 0.0004, "step": 1180 }, { "epoch": 21.01, "learning_rate": 6.303030303030303e-06, "loss": 0.0024, "step": 1190 }, { "epoch": 21.01, "learning_rate": 6.262626262626264e-06, "loss": 0.0345, "step": 1200 }, { "epoch": 21.01, "learning_rate": 6.222222222222223e-06, "loss": 0.0003, "step": 1210 }, { "epoch": 21.02, "learning_rate": 6.181818181818182e-06, "loss": 0.0003, "step": 1220 }, { "epoch": 21.02, "learning_rate": 6.141414141414141e-06, "loss": 0.0006, "step": 1230 }, { "epoch": 21.02, "eval_accuracy": 0.72, "eval_loss": 1.1668070554733276, "eval_runtime": 5.1021, "eval_samples_per_second": 4.9, "eval_steps_per_second": 1.372, "step": 1232 }, { "epoch": 22.0, "learning_rate": 6.1010101010101015e-06, "loss": 0.0356, "step": 1240 }, { "epoch": 22.01, "learning_rate": 6.060606060606061e-06, "loss": 0.0005, "step": 1250 }, { "epoch": 22.01, "learning_rate": 6.0202020202020204e-06, "loss": 0.0013, "step": 1260 }, { "epoch": 22.01, "learning_rate": 5.97979797979798e-06, "loss": 0.1359, "step": 1270 }, { "epoch": 22.02, "learning_rate": 5.93939393939394e-06, "loss": 0.0006, "step": 1280 }, { "epoch": 22.02, "eval_accuracy": 0.76, "eval_loss": 1.2781660556793213, "eval_runtime": 5.0836, "eval_samples_per_second": 4.918, "eval_steps_per_second": 1.377, "step": 1288 }, { "epoch": 23.0, "learning_rate": 5.8989898989899e-06, "loss": 0.0003, "step": 1290 }, { "epoch": 23.0, "learning_rate": 5.858585858585859e-06, "loss": 0.0005, "step": 1300 }, { "epoch": 23.01, "learning_rate": 5.8181818181818185e-06, "loss": 0.0015, "step": 1310 }, { "epoch": 23.01, "learning_rate": 5.777777777777778e-06, "loss": 0.0003, "step": 1320 }, { "epoch": 23.02, "learning_rate": 5.737373737373738e-06, "loss": 0.0004, "step": 1330 }, { "epoch": 23.02, "learning_rate": 5.696969696969698e-06, "loss": 0.0003, "step": 1340 }, { "epoch": 23.02, "eval_accuracy": 0.76, "eval_loss": 1.259096622467041, "eval_runtime": 5.0958, "eval_samples_per_second": 4.906, "eval_steps_per_second": 1.374, "step": 1344 }, { "epoch": 24.0, "learning_rate": 5.656565656565657e-06, "loss": 0.0002, "step": 1350 }, { "epoch": 24.01, "learning_rate": 5.616161616161616e-06, "loss": 0.0002, "step": 1360 }, { "epoch": 24.01, "learning_rate": 5.575757575757577e-06, "loss": 0.0002, "step": 1370 }, { "epoch": 24.01, "learning_rate": 5.5353535353535355e-06, "loss": 0.0026, "step": 1380 }, { "epoch": 24.02, "learning_rate": 5.494949494949495e-06, "loss": 0.1444, "step": 1390 }, { "epoch": 24.02, "learning_rate": 5.4545454545454545e-06, "loss": 0.0004, "step": 1400 }, { "epoch": 24.02, "eval_accuracy": 0.72, "eval_loss": 1.5767698287963867, "eval_runtime": 5.2043, "eval_samples_per_second": 4.804, "eval_steps_per_second": 1.345, "step": 1400 }, { "epoch": 25.0, "learning_rate": 5.414141414141415e-06, "loss": 0.3487, "step": 1410 }, { "epoch": 25.01, "learning_rate": 5.373737373737374e-06, "loss": 0.0664, "step": 1420 }, { "epoch": 25.01, "learning_rate": 5.333333333333334e-06, "loss": 0.0134, "step": 1430 }, { "epoch": 25.01, "learning_rate": 5.292929292929293e-06, "loss": 0.0905, "step": 1440 }, { "epoch": 25.02, "learning_rate": 5.252525252525253e-06, "loss": 0.0431, "step": 1450 }, { "epoch": 25.02, "eval_accuracy": 0.8, "eval_loss": 1.1631672382354736, "eval_runtime": 5.2489, "eval_samples_per_second": 4.763, "eval_steps_per_second": 1.334, "step": 1456 }, { "epoch": 26.0, "learning_rate": 5.212121212121213e-06, "loss": 0.0004, "step": 1460 }, { "epoch": 26.01, "learning_rate": 5.171717171717172e-06, "loss": 0.0004, "step": 1470 }, { "epoch": 26.01, "learning_rate": 5.131313131313132e-06, "loss": 0.0002, "step": 1480 }, { "epoch": 26.01, "learning_rate": 5.090909090909091e-06, "loss": 0.1211, "step": 1490 }, { "epoch": 26.02, "learning_rate": 5.0505050505050515e-06, "loss": 0.0002, "step": 1500 }, { "epoch": 26.02, "learning_rate": 5.010101010101011e-06, "loss": 0.008, "step": 1510 }, { "epoch": 26.02, "eval_accuracy": 0.76, "eval_loss": 1.4113043546676636, "eval_runtime": 5.0102, "eval_samples_per_second": 4.99, "eval_steps_per_second": 1.397, "step": 1512 }, { "epoch": 27.0, "learning_rate": 4.9696969696969696e-06, "loss": 0.0002, "step": 1520 }, { "epoch": 27.01, "learning_rate": 4.92929292929293e-06, "loss": 0.0003, "step": 1530 }, { "epoch": 27.01, "learning_rate": 4.888888888888889e-06, "loss": 0.0002, "step": 1540 }, { "epoch": 27.01, "learning_rate": 4.848484848484849e-06, "loss": 0.0001, "step": 1550 }, { "epoch": 27.02, "learning_rate": 4.808080808080808e-06, "loss": 0.0003, "step": 1560 }, { "epoch": 27.02, "eval_accuracy": 0.76, "eval_loss": 1.2238938808441162, "eval_runtime": 4.9902, "eval_samples_per_second": 5.01, "eval_steps_per_second": 1.403, "step": 1568 }, { "epoch": 28.0, "learning_rate": 4.7676767676767685e-06, "loss": 0.0001, "step": 1570 }, { "epoch": 28.0, "learning_rate": 4.727272727272728e-06, "loss": 0.0001, "step": 1580 }, { "epoch": 28.01, "learning_rate": 4.6868686868686874e-06, "loss": 0.0002, "step": 1590 }, { "epoch": 28.01, "learning_rate": 4.646464646464647e-06, "loss": 0.0193, "step": 1600 }, { "epoch": 28.02, "learning_rate": 4.606060606060606e-06, "loss": 0.0072, "step": 1610 }, { "epoch": 28.02, "learning_rate": 4.565656565656566e-06, "loss": 0.003, "step": 1620 }, { "epoch": 28.02, "eval_accuracy": 0.72, "eval_loss": 1.719529151916504, "eval_runtime": 5.0061, "eval_samples_per_second": 4.994, "eval_steps_per_second": 1.398, "step": 1624 }, { "epoch": 29.0, "learning_rate": 4.525252525252526e-06, "loss": 0.0002, "step": 1630 }, { "epoch": 29.01, "learning_rate": 4.4848484848484855e-06, "loss": 0.0001, "step": 1640 }, { "epoch": 29.01, "learning_rate": 4.444444444444444e-06, "loss": 0.0862, "step": 1650 }, { "epoch": 29.01, "learning_rate": 4.4040404040404044e-06, "loss": 0.1493, "step": 1660 }, { "epoch": 29.02, "learning_rate": 4.363636363636364e-06, "loss": 0.2325, "step": 1670 }, { "epoch": 29.02, "learning_rate": 4.323232323232323e-06, "loss": 0.3129, "step": 1680 }, { "epoch": 29.02, "eval_accuracy": 0.68, "eval_loss": 1.9160839319229126, "eval_runtime": 5.0112, "eval_samples_per_second": 4.989, "eval_steps_per_second": 1.397, "step": 1680 }, { "epoch": 30.0, "learning_rate": 4.282828282828283e-06, "loss": 0.1379, "step": 1690 }, { "epoch": 30.01, "learning_rate": 4.242424242424243e-06, "loss": 0.0002, "step": 1700 }, { "epoch": 30.01, "learning_rate": 4.2020202020202026e-06, "loss": 0.0002, "step": 1710 }, { "epoch": 30.01, "learning_rate": 4.161616161616162e-06, "loss": 0.023, "step": 1720 }, { "epoch": 30.02, "learning_rate": 4.1212121212121215e-06, "loss": 0.0001, "step": 1730 }, { "epoch": 30.02, "eval_accuracy": 0.76, "eval_loss": 1.4176759719848633, "eval_runtime": 5.0603, "eval_samples_per_second": 4.94, "eval_steps_per_second": 1.383, "step": 1736 }, { "epoch": 31.0, "learning_rate": 4.080808080808081e-06, "loss": 0.0001, "step": 1740 }, { "epoch": 31.01, "learning_rate": 4.04040404040404e-06, "loss": 0.0001, "step": 1750 }, { "epoch": 31.01, "learning_rate": 4.000000000000001e-06, "loss": 0.0002, "step": 1760 }, { "epoch": 31.01, "learning_rate": 3.95959595959596e-06, "loss": 0.0818, "step": 1770 }, { "epoch": 31.02, "learning_rate": 3.9191919191919196e-06, "loss": 0.0002, "step": 1780 }, { "epoch": 31.02, "learning_rate": 3.878787878787879e-06, "loss": 0.0001, "step": 1790 }, { "epoch": 31.02, "eval_accuracy": 0.68, "eval_loss": 1.468752145767212, "eval_runtime": 5.0305, "eval_samples_per_second": 4.97, "eval_steps_per_second": 1.391, "step": 1792 }, { "epoch": 32.0, "learning_rate": 3.8383838383838385e-06, "loss": 0.0002, "step": 1800 }, { "epoch": 32.01, "learning_rate": 3.7979797979797984e-06, "loss": 0.0003, "step": 1810 }, { "epoch": 32.01, "learning_rate": 3.757575757575758e-06, "loss": 0.0004, "step": 1820 }, { "epoch": 32.01, "learning_rate": 3.7171717171717177e-06, "loss": 0.0001, "step": 1830 }, { "epoch": 32.02, "learning_rate": 3.6767676767676767e-06, "loss": 0.0314, "step": 1840 }, { "epoch": 32.02, "eval_accuracy": 0.68, "eval_loss": 1.4025585651397705, "eval_runtime": 5.016, "eval_samples_per_second": 4.984, "eval_steps_per_second": 1.396, "step": 1848 }, { "epoch": 33.0, "learning_rate": 3.6363636363636366e-06, "loss": 0.0591, "step": 1850 }, { "epoch": 33.0, "learning_rate": 3.595959595959596e-06, "loss": 0.0852, "step": 1860 }, { "epoch": 33.01, "learning_rate": 3.555555555555556e-06, "loss": 0.0007, "step": 1870 }, { "epoch": 33.01, "learning_rate": 3.5151515151515154e-06, "loss": 0.0001, "step": 1880 }, { "epoch": 33.02, "learning_rate": 3.4747474747474752e-06, "loss": 0.0012, "step": 1890 }, { "epoch": 33.02, "learning_rate": 3.4343434343434347e-06, "loss": 0.0001, "step": 1900 }, { "epoch": 33.02, "eval_accuracy": 0.72, "eval_loss": 1.584563136100769, "eval_runtime": 5.0691, "eval_samples_per_second": 4.932, "eval_steps_per_second": 1.381, "step": 1904 }, { "epoch": 34.0, "learning_rate": 3.3939393939393946e-06, "loss": 0.0001, "step": 1910 }, { "epoch": 34.01, "learning_rate": 3.3535353535353536e-06, "loss": 0.0001, "step": 1920 }, { "epoch": 34.01, "learning_rate": 3.3131313131313135e-06, "loss": 0.0001, "step": 1930 }, { "epoch": 34.01, "learning_rate": 3.272727272727273e-06, "loss": 0.0002, "step": 1940 }, { "epoch": 34.02, "learning_rate": 3.232323232323233e-06, "loss": 0.0024, "step": 1950 }, { "epoch": 34.02, "learning_rate": 3.1919191919191923e-06, "loss": 0.0001, "step": 1960 }, { "epoch": 34.02, "eval_accuracy": 0.64, "eval_loss": 1.4020888805389404, "eval_runtime": 5.0416, "eval_samples_per_second": 4.959, "eval_steps_per_second": 1.388, "step": 1960 }, { "epoch": 35.0, "learning_rate": 3.1515151515151517e-06, "loss": 0.0132, "step": 1970 }, { "epoch": 35.01, "learning_rate": 3.1111111111111116e-06, "loss": 0.0001, "step": 1980 }, { "epoch": 35.01, "learning_rate": 3.0707070707070706e-06, "loss": 0.0001, "step": 1990 }, { "epoch": 35.01, "learning_rate": 3.0303030303030305e-06, "loss": 0.0002, "step": 2000 }, { "epoch": 35.02, "learning_rate": 2.98989898989899e-06, "loss": 0.0002, "step": 2010 }, { "epoch": 35.02, "eval_accuracy": 0.72, "eval_loss": 1.5993890762329102, "eval_runtime": 5.078, "eval_samples_per_second": 4.923, "eval_steps_per_second": 1.379, "step": 2016 }, { "epoch": 36.0, "learning_rate": 2.94949494949495e-06, "loss": 0.0003, "step": 2020 }, { "epoch": 36.01, "learning_rate": 2.9090909090909093e-06, "loss": 0.0001, "step": 2030 }, { "epoch": 36.01, "learning_rate": 2.868686868686869e-06, "loss": 0.0001, "step": 2040 }, { "epoch": 36.01, "learning_rate": 2.8282828282828286e-06, "loss": 0.0001, "step": 2050 }, { "epoch": 36.02, "learning_rate": 2.7878787878787885e-06, "loss": 0.0001, "step": 2060 }, { "epoch": 36.02, "learning_rate": 2.7474747474747475e-06, "loss": 0.0001, "step": 2070 }, { "epoch": 36.02, "eval_accuracy": 0.72, "eval_loss": 1.4026561975479126, "eval_runtime": 4.9911, "eval_samples_per_second": 5.009, "eval_steps_per_second": 1.402, "step": 2072 }, { "epoch": 37.0, "learning_rate": 2.7070707070707074e-06, "loss": 0.0004, "step": 2080 }, { "epoch": 37.01, "learning_rate": 2.666666666666667e-06, "loss": 0.0036, "step": 2090 }, { "epoch": 37.01, "learning_rate": 2.6262626262626267e-06, "loss": 0.0001, "step": 2100 }, { "epoch": 37.01, "learning_rate": 2.585858585858586e-06, "loss": 0.0001, "step": 2110 }, { "epoch": 37.02, "learning_rate": 2.5454545454545456e-06, "loss": 0.0002, "step": 2120 }, { "epoch": 37.02, "eval_accuracy": 0.76, "eval_loss": 1.360788106918335, "eval_runtime": 4.9882, "eval_samples_per_second": 5.012, "eval_steps_per_second": 1.403, "step": 2128 }, { "epoch": 38.0, "learning_rate": 2.5050505050505055e-06, "loss": 0.0007, "step": 2130 }, { "epoch": 38.0, "learning_rate": 2.464646464646465e-06, "loss": 0.0001, "step": 2140 }, { "epoch": 38.01, "learning_rate": 2.4242424242424244e-06, "loss": 0.0002, "step": 2150 }, { "epoch": 38.01, "learning_rate": 2.3838383838383843e-06, "loss": 0.0001, "step": 2160 }, { "epoch": 38.02, "learning_rate": 2.3434343434343437e-06, "loss": 0.0001, "step": 2170 }, { "epoch": 38.02, "learning_rate": 2.303030303030303e-06, "loss": 0.0001, "step": 2180 }, { "epoch": 38.02, "eval_accuracy": 0.8, "eval_loss": 1.356919288635254, "eval_runtime": 4.9514, "eval_samples_per_second": 5.049, "eval_steps_per_second": 1.414, "step": 2184 }, { "epoch": 39.0, "learning_rate": 2.262626262626263e-06, "loss": 0.0007, "step": 2190 }, { "epoch": 39.01, "learning_rate": 2.222222222222222e-06, "loss": 0.0001, "step": 2200 }, { "epoch": 39.01, "learning_rate": 2.181818181818182e-06, "loss": 0.002, "step": 2210 }, { "epoch": 39.01, "learning_rate": 2.1414141414141414e-06, "loss": 0.0001, "step": 2220 }, { "epoch": 39.02, "learning_rate": 2.1010101010101013e-06, "loss": 0.0001, "step": 2230 }, { "epoch": 39.02, "learning_rate": 2.0606060606060607e-06, "loss": 0.0001, "step": 2240 }, { "epoch": 39.02, "eval_accuracy": 0.8, "eval_loss": 1.4026089906692505, "eval_runtime": 4.9521, "eval_samples_per_second": 5.048, "eval_steps_per_second": 1.414, "step": 2240 }, { "epoch": 40.0, "learning_rate": 2.02020202020202e-06, "loss": 0.0001, "step": 2250 }, { "epoch": 40.01, "learning_rate": 1.97979797979798e-06, "loss": 0.0001, "step": 2260 }, { "epoch": 40.01, "learning_rate": 1.9393939393939395e-06, "loss": 0.0001, "step": 2270 }, { "epoch": 40.01, "learning_rate": 1.8989898989898992e-06, "loss": 0.0002, "step": 2280 }, { "epoch": 40.02, "learning_rate": 1.8585858585858588e-06, "loss": 0.0001, "step": 2290 }, { "epoch": 40.02, "eval_accuracy": 0.72, "eval_loss": 1.4563390016555786, "eval_runtime": 4.9991, "eval_samples_per_second": 5.001, "eval_steps_per_second": 1.4, "step": 2296 }, { "epoch": 41.0, "learning_rate": 1.8181818181818183e-06, "loss": 0.0001, "step": 2300 }, { "epoch": 41.01, "learning_rate": 1.777777777777778e-06, "loss": 0.0001, "step": 2310 }, { "epoch": 41.01, "learning_rate": 1.7373737373737376e-06, "loss": 0.0001, "step": 2320 }, { "epoch": 41.01, "learning_rate": 1.6969696969696973e-06, "loss": 0.0019, "step": 2330 }, { "epoch": 41.02, "learning_rate": 1.6565656565656567e-06, "loss": 0.0001, "step": 2340 }, { "epoch": 41.02, "learning_rate": 1.6161616161616164e-06, "loss": 0.0028, "step": 2350 }, { "epoch": 41.02, "eval_accuracy": 0.76, "eval_loss": 1.3389179706573486, "eval_runtime": 5.702, "eval_samples_per_second": 4.384, "eval_steps_per_second": 1.228, "step": 2352 }, { "epoch": 42.0, "learning_rate": 1.5757575757575759e-06, "loss": 0.0001, "step": 2360 }, { "epoch": 42.01, "learning_rate": 1.5353535353535353e-06, "loss": 0.0001, "step": 2370 }, { "epoch": 42.01, "learning_rate": 1.494949494949495e-06, "loss": 0.0001, "step": 2380 }, { "epoch": 42.01, "learning_rate": 1.4545454545454546e-06, "loss": 0.0001, "step": 2390 }, { "epoch": 42.02, "learning_rate": 1.4141414141414143e-06, "loss": 0.0001, "step": 2400 }, { "epoch": 42.02, "eval_accuracy": 0.8, "eval_loss": 1.3354742527008057, "eval_runtime": 5.6534, "eval_samples_per_second": 4.422, "eval_steps_per_second": 1.238, "step": 2408 }, { "epoch": 43.0, "learning_rate": 1.3737373737373738e-06, "loss": 0.0001, "step": 2410 }, { "epoch": 43.0, "learning_rate": 1.3333333333333334e-06, "loss": 0.0001, "step": 2420 }, { "epoch": 43.01, "learning_rate": 1.292929292929293e-06, "loss": 0.0001, "step": 2430 }, { "epoch": 43.01, "learning_rate": 1.2525252525252527e-06, "loss": 0.0001, "step": 2440 }, { "epoch": 43.02, "learning_rate": 1.2121212121212122e-06, "loss": 0.0001, "step": 2450 }, { "epoch": 43.02, "learning_rate": 1.1717171717171719e-06, "loss": 0.0001, "step": 2460 }, { "epoch": 43.02, "eval_accuracy": 0.8, "eval_loss": 1.3445130586624146, "eval_runtime": 5.7756, "eval_samples_per_second": 4.329, "eval_steps_per_second": 1.212, "step": 2464 }, { "epoch": 44.0, "learning_rate": 1.1313131313131315e-06, "loss": 0.0001, "step": 2470 }, { "epoch": 44.01, "learning_rate": 1.090909090909091e-06, "loss": 0.0001, "step": 2480 }, { "epoch": 44.01, "learning_rate": 1.0505050505050506e-06, "loss": 0.0001, "step": 2490 }, { "epoch": 44.01, "learning_rate": 1.01010101010101e-06, "loss": 0.0001, "step": 2500 }, { "epoch": 44.02, "learning_rate": 9.696969696969698e-07, "loss": 0.0001, "step": 2510 }, { "epoch": 44.02, "learning_rate": 9.292929292929294e-07, "loss": 0.0001, "step": 2520 }, { "epoch": 44.02, "eval_accuracy": 0.8, "eval_loss": 1.352601170539856, "eval_runtime": 5.381, "eval_samples_per_second": 4.646, "eval_steps_per_second": 1.301, "step": 2520 }, { "epoch": 45.0, "learning_rate": 8.88888888888889e-07, "loss": 0.0001, "step": 2530 }, { "epoch": 45.01, "learning_rate": 8.484848484848486e-07, "loss": 0.0001, "step": 2540 }, { "epoch": 45.01, "learning_rate": 8.080808080808082e-07, "loss": 0.0001, "step": 2550 }, { "epoch": 45.01, "learning_rate": 7.676767676767677e-07, "loss": 0.0001, "step": 2560 }, { "epoch": 45.02, "learning_rate": 7.272727272727273e-07, "loss": 0.0001, "step": 2570 }, { "epoch": 45.02, "eval_accuracy": 0.72, "eval_loss": 1.3842326402664185, "eval_runtime": 5.5724, "eval_samples_per_second": 4.486, "eval_steps_per_second": 1.256, "step": 2576 }, { "epoch": 46.0, "learning_rate": 6.868686868686869e-07, "loss": 0.0001, "step": 2580 }, { "epoch": 46.01, "learning_rate": 6.464646464646465e-07, "loss": 0.0001, "step": 2590 }, { "epoch": 46.01, "learning_rate": 6.060606060606061e-07, "loss": 0.0001, "step": 2600 }, { "epoch": 46.01, "learning_rate": 5.656565656565658e-07, "loss": 0.0001, "step": 2610 }, { "epoch": 46.02, "learning_rate": 5.252525252525253e-07, "loss": 0.0001, "step": 2620 }, { "epoch": 46.02, "learning_rate": 4.848484848484849e-07, "loss": 0.0001, "step": 2630 }, { "epoch": 46.02, "eval_accuracy": 0.72, "eval_loss": 1.3894717693328857, "eval_runtime": 5.0324, "eval_samples_per_second": 4.968, "eval_steps_per_second": 1.391, "step": 2632 }, { "epoch": 47.0, "learning_rate": 4.444444444444445e-07, "loss": 0.0001, "step": 2640 }, { "epoch": 47.01, "learning_rate": 4.040404040404041e-07, "loss": 0.0001, "step": 2650 }, { "epoch": 47.01, "learning_rate": 3.6363636363636366e-07, "loss": 0.0004, "step": 2660 }, { "epoch": 47.01, "learning_rate": 3.2323232323232327e-07, "loss": 0.0001, "step": 2670 }, { "epoch": 47.02, "learning_rate": 2.828282828282829e-07, "loss": 0.0025, "step": 2680 }, { "epoch": 47.02, "eval_accuracy": 0.76, "eval_loss": 1.482791543006897, "eval_runtime": 4.9577, "eval_samples_per_second": 5.043, "eval_steps_per_second": 1.412, "step": 2688 }, { "epoch": 48.0, "learning_rate": 2.4242424242424244e-07, "loss": 0.0001, "step": 2690 }, { "epoch": 48.0, "learning_rate": 2.0202020202020205e-07, "loss": 0.0001, "step": 2700 }, { "epoch": 48.01, "learning_rate": 1.6161616161616163e-07, "loss": 0.0002, "step": 2710 }, { "epoch": 48.01, "learning_rate": 1.2121212121212122e-07, "loss": 0.0001, "step": 2720 }, { "epoch": 48.02, "learning_rate": 8.080808080808082e-08, "loss": 0.0001, "step": 2730 }, { "epoch": 48.02, "learning_rate": 4.040404040404041e-08, "loss": 0.0001, "step": 2740 }, { "epoch": 48.02, "eval_accuracy": 0.76, "eval_loss": 1.4091929197311401, "eval_runtime": 4.9307, "eval_samples_per_second": 5.07, "eval_steps_per_second": 1.42, "step": 2744 }, { "epoch": 49.0, "learning_rate": 0.0, "loss": 0.2131, "step": 2750 }, { "epoch": 49.0, "eval_accuracy": 0.76, "eval_loss": 1.4071288108825684, "eval_runtime": 5.1054, "eval_samples_per_second": 4.897, "eval_steps_per_second": 1.371, "step": 2750 }, { "epoch": 49.0, "step": 2750, "total_flos": 4.808612917881491e+19, "train_loss": 0.16666692824898796, "train_runtime": 5075.4297, "train_samples_per_second": 2.167, "train_steps_per_second": 0.542 }, { "epoch": 49.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 1.2706124782562256, "eval_runtime": 6.9128, "eval_samples_per_second": 4.34, "eval_steps_per_second": 1.157, "step": 2750 }, { "epoch": 49.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 1.2706125974655151, "eval_runtime": 6.4418, "eval_samples_per_second": 4.657, "eval_steps_per_second": 1.242, "step": 2750 } ], "logging_steps": 10, "max_steps": 2750, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 4.808612917881491e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }