{ "best_metric": 0.728110599078341, "best_model_checkpoint": "videomae-base-finetuned-subset-check10/checkpoint-1960", "epoch": 39.016216216216215, "eval_steps": 500, "global_step": 2220, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.504504504504505e-07, "loss": 1.5494, "step": 10 }, { "epoch": 0.01, "learning_rate": 9.00900900900901e-07, "loss": 1.5329, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.3513513513513515e-06, "loss": 1.5277, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.801801801801802e-06, "loss": 1.5543, "step": 40 }, { "epoch": 0.02, "learning_rate": 2.2522522522522524e-06, "loss": 1.5175, "step": 50 }, { "epoch": 0.03, "eval_accuracy": 0.2073732718894009, "eval_loss": 1.6040809154510498, "eval_runtime": 183.7832, "eval_samples_per_second": 1.181, "eval_steps_per_second": 0.152, "step": 56 }, { "epoch": 1.0, "learning_rate": 2.702702702702703e-06, "loss": 1.4668, "step": 60 }, { "epoch": 1.01, "learning_rate": 3.1531531531531532e-06, "loss": 1.4494, "step": 70 }, { "epoch": 1.01, "learning_rate": 3.603603603603604e-06, "loss": 1.5305, "step": 80 }, { "epoch": 1.02, "learning_rate": 4.0540540540540545e-06, "loss": 1.4669, "step": 90 }, { "epoch": 1.02, "learning_rate": 4.504504504504505e-06, "loss": 1.4467, "step": 100 }, { "epoch": 1.02, "learning_rate": 4.954954954954955e-06, "loss": 1.4397, "step": 110 }, { "epoch": 1.03, "eval_accuracy": 0.3870967741935484, "eval_loss": 1.4559013843536377, "eval_runtime": 165.5917, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.169, "step": 112 }, { "epoch": 2.0, "learning_rate": 5.405405405405406e-06, "loss": 1.3919, "step": 120 }, { "epoch": 2.01, "learning_rate": 5.855855855855856e-06, "loss": 1.4257, "step": 130 }, { "epoch": 2.01, "learning_rate": 6.3063063063063065e-06, "loss": 1.402, "step": 140 }, { "epoch": 2.02, "learning_rate": 6.7567567567567575e-06, "loss": 1.3609, "step": 150 }, { "epoch": 2.02, "learning_rate": 7.207207207207208e-06, "loss": 1.464, "step": 160 }, { "epoch": 2.03, "eval_accuracy": 0.39631336405529954, "eval_loss": 1.3637099266052246, "eval_runtime": 165.2932, "eval_samples_per_second": 1.313, "eval_steps_per_second": 0.169, "step": 168 }, { "epoch": 3.0, "learning_rate": 7.657657657657658e-06, "loss": 1.3384, "step": 170 }, { "epoch": 3.01, "learning_rate": 8.108108108108109e-06, "loss": 1.4317, "step": 180 }, { "epoch": 3.01, "learning_rate": 8.55855855855856e-06, "loss": 1.1669, "step": 190 }, { "epoch": 3.01, "learning_rate": 9.00900900900901e-06, "loss": 1.4449, "step": 200 }, { "epoch": 3.02, "learning_rate": 9.45945945945946e-06, "loss": 1.3429, "step": 210 }, { "epoch": 3.02, "learning_rate": 9.90990990990991e-06, "loss": 1.3404, "step": 220 }, { "epoch": 3.03, "eval_accuracy": 0.4470046082949309, "eval_loss": 1.2467495203018188, "eval_runtime": 166.7691, "eval_samples_per_second": 1.301, "eval_steps_per_second": 0.168, "step": 224 }, { "epoch": 4.0, "learning_rate": 9.95995995995996e-06, "loss": 1.2682, "step": 230 }, { "epoch": 4.01, "learning_rate": 9.90990990990991e-06, "loss": 1.2307, "step": 240 }, { "epoch": 4.01, "learning_rate": 9.85985985985986e-06, "loss": 1.14, "step": 250 }, { "epoch": 4.02, "learning_rate": 9.80980980980981e-06, "loss": 1.091, "step": 260 }, { "epoch": 4.02, "learning_rate": 9.75975975975976e-06, "loss": 1.1822, "step": 270 }, { "epoch": 4.03, "learning_rate": 9.70970970970971e-06, "loss": 1.3284, "step": 280 }, { "epoch": 4.03, "eval_accuracy": 0.3317972350230415, "eval_loss": 1.3115123510360718, "eval_runtime": 165.1237, "eval_samples_per_second": 1.314, "eval_steps_per_second": 0.17, "step": 280 }, { "epoch": 5.0, "learning_rate": 9.65965965965966e-06, "loss": 1.1944, "step": 290 }, { "epoch": 5.01, "learning_rate": 9.60960960960961e-06, "loss": 1.3055, "step": 300 }, { "epoch": 5.01, "learning_rate": 9.55955955955956e-06, "loss": 1.0722, "step": 310 }, { "epoch": 5.02, "learning_rate": 9.50950950950951e-06, "loss": 1.0467, "step": 320 }, { "epoch": 5.02, "learning_rate": 9.45945945945946e-06, "loss": 1.1598, "step": 330 }, { "epoch": 5.03, "eval_accuracy": 0.4470046082949309, "eval_loss": 1.2488903999328613, "eval_runtime": 168.3906, "eval_samples_per_second": 1.289, "eval_steps_per_second": 0.166, "step": 336 }, { "epoch": 6.0, "learning_rate": 9.40940940940941e-06, "loss": 1.2285, "step": 340 }, { "epoch": 6.01, "learning_rate": 9.35935935935936e-06, "loss": 1.0002, "step": 350 }, { "epoch": 6.01, "learning_rate": 9.30930930930931e-06, "loss": 1.2506, "step": 360 }, { "epoch": 6.02, "learning_rate": 9.25925925925926e-06, "loss": 1.0963, "step": 370 }, { "epoch": 6.02, "learning_rate": 9.20920920920921e-06, "loss": 1.1495, "step": 380 }, { "epoch": 6.02, "learning_rate": 9.15915915915916e-06, "loss": 0.9615, "step": 390 }, { "epoch": 6.03, "eval_accuracy": 0.4009216589861751, "eval_loss": 1.3057225942611694, "eval_runtime": 165.4477, "eval_samples_per_second": 1.312, "eval_steps_per_second": 0.169, "step": 392 }, { "epoch": 7.0, "learning_rate": 9.10910910910911e-06, "loss": 1.0211, "step": 400 }, { "epoch": 7.01, "learning_rate": 9.05905905905906e-06, "loss": 1.0622, "step": 410 }, { "epoch": 7.01, "learning_rate": 9.00900900900901e-06, "loss": 1.1759, "step": 420 }, { "epoch": 7.02, "learning_rate": 8.95895895895896e-06, "loss": 1.0414, "step": 430 }, { "epoch": 7.02, "learning_rate": 8.90890890890891e-06, "loss": 0.9357, "step": 440 }, { "epoch": 7.03, "eval_accuracy": 0.6497695852534562, "eval_loss": 0.9200783967971802, "eval_runtime": 165.3979, "eval_samples_per_second": 1.312, "eval_steps_per_second": 0.169, "step": 448 }, { "epoch": 8.0, "learning_rate": 8.85885885885886e-06, "loss": 1.2506, "step": 450 }, { "epoch": 8.01, "learning_rate": 8.80880880880881e-06, "loss": 1.0342, "step": 460 }, { "epoch": 8.01, "learning_rate": 8.75875875875876e-06, "loss": 1.0815, "step": 470 }, { "epoch": 8.01, "learning_rate": 8.70870870870871e-06, "loss": 1.0373, "step": 480 }, { "epoch": 8.02, "learning_rate": 8.65865865865866e-06, "loss": 0.9051, "step": 490 }, { "epoch": 8.02, "learning_rate": 8.60860860860861e-06, "loss": 0.9785, "step": 500 }, { "epoch": 8.03, "eval_accuracy": 0.6774193548387096, "eval_loss": 0.8629115223884583, "eval_runtime": 165.2531, "eval_samples_per_second": 1.313, "eval_steps_per_second": 0.169, "step": 504 }, { "epoch": 9.0, "learning_rate": 8.55855855855856e-06, "loss": 0.9915, "step": 510 }, { "epoch": 9.01, "learning_rate": 8.50850850850851e-06, "loss": 0.9176, "step": 520 }, { "epoch": 9.01, "learning_rate": 8.45845845845846e-06, "loss": 1.1561, "step": 530 }, { "epoch": 9.02, "learning_rate": 8.408408408408409e-06, "loss": 0.8966, "step": 540 }, { "epoch": 9.02, "learning_rate": 8.358358358358359e-06, "loss": 0.894, "step": 550 }, { "epoch": 9.03, "learning_rate": 8.308308308308309e-06, "loss": 1.0862, "step": 560 }, { "epoch": 9.03, "eval_accuracy": 0.5069124423963134, "eval_loss": 1.0976766347885132, "eval_runtime": 165.1522, "eval_samples_per_second": 1.314, "eval_steps_per_second": 0.17, "step": 560 }, { "epoch": 10.0, "learning_rate": 8.258258258258259e-06, "loss": 0.7806, "step": 570 }, { "epoch": 10.01, "learning_rate": 8.208208208208209e-06, "loss": 0.9904, "step": 580 }, { "epoch": 10.01, "learning_rate": 8.158158158158159e-06, "loss": 0.9099, "step": 590 }, { "epoch": 10.02, "learning_rate": 8.108108108108109e-06, "loss": 0.9169, "step": 600 }, { "epoch": 10.02, "learning_rate": 8.058058058058059e-06, "loss": 0.9315, "step": 610 }, { "epoch": 10.03, "eval_accuracy": 0.7096774193548387, "eval_loss": 0.7867635488510132, "eval_runtime": 165.0158, "eval_samples_per_second": 1.315, "eval_steps_per_second": 0.17, "step": 616 }, { "epoch": 11.0, "learning_rate": 8.00800800800801e-06, "loss": 0.7636, "step": 620 }, { "epoch": 11.01, "learning_rate": 7.95795795795796e-06, "loss": 0.9198, "step": 630 }, { "epoch": 11.01, "learning_rate": 7.90790790790791e-06, "loss": 1.0816, "step": 640 }, { "epoch": 11.02, "learning_rate": 7.85785785785786e-06, "loss": 0.8409, "step": 650 }, { "epoch": 11.02, "learning_rate": 7.807807807807808e-06, "loss": 0.7348, "step": 660 }, { "epoch": 11.02, "learning_rate": 7.757757757757758e-06, "loss": 0.9404, "step": 670 }, { "epoch": 11.03, "eval_accuracy": 0.6728110599078341, "eval_loss": 0.8170290589332581, "eval_runtime": 165.2251, "eval_samples_per_second": 1.313, "eval_steps_per_second": 0.169, "step": 672 }, { "epoch": 12.0, "learning_rate": 7.707707707707708e-06, "loss": 0.8669, "step": 680 }, { "epoch": 12.01, "learning_rate": 7.657657657657658e-06, "loss": 0.7773, "step": 690 }, { "epoch": 12.01, "learning_rate": 7.607607607607608e-06, "loss": 0.7599, "step": 700 }, { "epoch": 12.02, "learning_rate": 7.557557557557558e-06, "loss": 0.9216, "step": 710 }, { "epoch": 12.02, "learning_rate": 7.507507507507507e-06, "loss": 0.939, "step": 720 }, { "epoch": 12.03, "eval_accuracy": 0.663594470046083, "eval_loss": 0.9246166348457336, "eval_runtime": 165.7726, "eval_samples_per_second": 1.309, "eval_steps_per_second": 0.169, "step": 728 }, { "epoch": 13.0, "learning_rate": 7.457457457457457e-06, "loss": 0.9703, "step": 730 }, { "epoch": 13.01, "learning_rate": 7.4074074074074075e-06, "loss": 0.6688, "step": 740 }, { "epoch": 13.01, "learning_rate": 7.3573573573573575e-06, "loss": 0.9097, "step": 750 }, { "epoch": 13.01, "learning_rate": 7.307307307307308e-06, "loss": 0.9963, "step": 760 }, { "epoch": 13.02, "learning_rate": 7.257257257257258e-06, "loss": 0.8594, "step": 770 }, { "epoch": 13.02, "learning_rate": 7.207207207207208e-06, "loss": 0.8205, "step": 780 }, { "epoch": 13.03, "eval_accuracy": 0.6866359447004609, "eval_loss": 0.8420272469520569, "eval_runtime": 165.0282, "eval_samples_per_second": 1.315, "eval_steps_per_second": 0.17, "step": 784 }, { "epoch": 14.0, "learning_rate": 7.157157157157158e-06, "loss": 0.8206, "step": 790 }, { "epoch": 14.01, "learning_rate": 7.107107107107107e-06, "loss": 0.8183, "step": 800 }, { "epoch": 14.01, "learning_rate": 7.057057057057057e-06, "loss": 0.7596, "step": 810 }, { "epoch": 14.02, "learning_rate": 7.007007007007007e-06, "loss": 0.799, "step": 820 }, { "epoch": 14.02, "learning_rate": 6.956956956956957e-06, "loss": 0.7377, "step": 830 }, { "epoch": 14.03, "learning_rate": 6.906906906906907e-06, "loss": 0.6719, "step": 840 }, { "epoch": 14.03, "eval_accuracy": 0.5898617511520737, "eval_loss": 1.07249116897583, "eval_runtime": 164.1063, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.171, "step": 840 }, { "epoch": 15.0, "learning_rate": 6.856856856856857e-06, "loss": 0.6874, "step": 850 }, { "epoch": 15.01, "learning_rate": 6.8068068068068075e-06, "loss": 0.8207, "step": 860 }, { "epoch": 15.01, "learning_rate": 6.7567567567567575e-06, "loss": 0.7454, "step": 870 }, { "epoch": 15.02, "learning_rate": 6.706706706706707e-06, "loss": 0.7188, "step": 880 }, { "epoch": 15.02, "learning_rate": 6.656656656656657e-06, "loss": 0.8308, "step": 890 }, { "epoch": 15.03, "eval_accuracy": 0.6912442396313364, "eval_loss": 0.8682537078857422, "eval_runtime": 165.7327, "eval_samples_per_second": 1.309, "eval_steps_per_second": 0.169, "step": 896 }, { "epoch": 16.0, "learning_rate": 6.606606606606607e-06, "loss": 0.891, "step": 900 }, { "epoch": 16.01, "learning_rate": 6.556556556556557e-06, "loss": 0.7597, "step": 910 }, { "epoch": 16.01, "learning_rate": 6.506506506506507e-06, "loss": 0.8318, "step": 920 }, { "epoch": 16.02, "learning_rate": 6.456456456456457e-06, "loss": 0.7681, "step": 930 }, { "epoch": 16.02, "learning_rate": 6.406406406406407e-06, "loss": 1.0094, "step": 940 }, { "epoch": 16.02, "learning_rate": 6.356356356356357e-06, "loss": 0.7554, "step": 950 }, { "epoch": 16.03, "eval_accuracy": 0.5990783410138248, "eval_loss": 0.9684067368507385, "eval_runtime": 164.5634, "eval_samples_per_second": 1.319, "eval_steps_per_second": 0.17, "step": 952 }, { "epoch": 17.0, "learning_rate": 6.3063063063063065e-06, "loss": 0.9087, "step": 960 }, { "epoch": 17.01, "learning_rate": 6.2562562562562565e-06, "loss": 0.7401, "step": 970 }, { "epoch": 17.01, "learning_rate": 6.206206206206207e-06, "loss": 0.7611, "step": 980 }, { "epoch": 17.02, "learning_rate": 6.156156156156157e-06, "loss": 0.6347, "step": 990 }, { "epoch": 17.02, "learning_rate": 6.106106106106107e-06, "loss": 0.6962, "step": 1000 }, { "epoch": 17.03, "eval_accuracy": 0.5483870967741935, "eval_loss": 1.1106446981430054, "eval_runtime": 165.0022, "eval_samples_per_second": 1.315, "eval_steps_per_second": 0.17, "step": 1008 }, { "epoch": 18.0, "learning_rate": 6.056056056056057e-06, "loss": 0.6861, "step": 1010 }, { "epoch": 18.01, "learning_rate": 6.006006006006007e-06, "loss": 0.7999, "step": 1020 }, { "epoch": 18.01, "learning_rate": 5.955955955955957e-06, "loss": 0.7412, "step": 1030 }, { "epoch": 18.01, "learning_rate": 5.905905905905906e-06, "loss": 0.8145, "step": 1040 }, { "epoch": 18.02, "learning_rate": 5.855855855855856e-06, "loss": 0.6653, "step": 1050 }, { "epoch": 18.02, "learning_rate": 5.805805805805806e-06, "loss": 0.7995, "step": 1060 }, { "epoch": 18.03, "eval_accuracy": 0.6497695852534562, "eval_loss": 0.9750950336456299, "eval_runtime": 163.7137, "eval_samples_per_second": 1.325, "eval_steps_per_second": 0.171, "step": 1064 }, { "epoch": 19.0, "learning_rate": 5.755755755755756e-06, "loss": 0.6923, "step": 1070 }, { "epoch": 19.01, "learning_rate": 5.7057057057057065e-06, "loss": 0.7874, "step": 1080 }, { "epoch": 19.01, "learning_rate": 5.6556556556556565e-06, "loss": 0.728, "step": 1090 }, { "epoch": 19.02, "learning_rate": 5.605605605605607e-06, "loss": 0.6329, "step": 1100 }, { "epoch": 19.02, "learning_rate": 5.555555555555557e-06, "loss": 0.8939, "step": 1110 }, { "epoch": 19.03, "learning_rate": 5.505505505505506e-06, "loss": 0.8298, "step": 1120 }, { "epoch": 19.03, "eval_accuracy": 0.5299539170506913, "eval_loss": 1.0630956888198853, "eval_runtime": 165.0711, "eval_samples_per_second": 1.315, "eval_steps_per_second": 0.17, "step": 1120 }, { "epoch": 20.0, "learning_rate": 5.455455455455456e-06, "loss": 0.7025, "step": 1130 }, { "epoch": 20.01, "learning_rate": 5.405405405405406e-06, "loss": 0.6251, "step": 1140 }, { "epoch": 20.01, "learning_rate": 5.355355355355356e-06, "loss": 0.7818, "step": 1150 }, { "epoch": 20.02, "learning_rate": 5.305305305305306e-06, "loss": 0.8259, "step": 1160 }, { "epoch": 20.02, "learning_rate": 5.255255255255256e-06, "loss": 0.6607, "step": 1170 }, { "epoch": 20.03, "eval_accuracy": 0.6175115207373272, "eval_loss": 0.9457883834838867, "eval_runtime": 164.6389, "eval_samples_per_second": 1.318, "eval_steps_per_second": 0.17, "step": 1176 }, { "epoch": 21.0, "learning_rate": 5.205205205205206e-06, "loss": 0.6344, "step": 1180 }, { "epoch": 21.01, "learning_rate": 5.155155155155156e-06, "loss": 0.637, "step": 1190 }, { "epoch": 21.01, "learning_rate": 5.105105105105106e-06, "loss": 0.858, "step": 1200 }, { "epoch": 21.02, "learning_rate": 5.055055055055056e-06, "loss": 0.6934, "step": 1210 }, { "epoch": 21.02, "learning_rate": 5.005005005005006e-06, "loss": 0.5714, "step": 1220 }, { "epoch": 21.02, "learning_rate": 4.954954954954955e-06, "loss": 0.688, "step": 1230 }, { "epoch": 21.03, "eval_accuracy": 0.6036866359447005, "eval_loss": 1.029563546180725, "eval_runtime": 163.856, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.171, "step": 1232 }, { "epoch": 22.0, "learning_rate": 4.904904904904905e-06, "loss": 0.5114, "step": 1240 }, { "epoch": 22.01, "learning_rate": 4.854854854854855e-06, "loss": 0.6638, "step": 1250 }, { "epoch": 22.01, "learning_rate": 4.804804804804805e-06, "loss": 0.7833, "step": 1260 }, { "epoch": 22.02, "learning_rate": 4.754754754754755e-06, "loss": 0.7556, "step": 1270 }, { "epoch": 22.02, "learning_rate": 4.704704704704705e-06, "loss": 0.5835, "step": 1280 }, { "epoch": 22.03, "eval_accuracy": 0.6774193548387096, "eval_loss": 0.8947747349739075, "eval_runtime": 163.875, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.171, "step": 1288 }, { "epoch": 23.0, "learning_rate": 4.654654654654655e-06, "loss": 0.7209, "step": 1290 }, { "epoch": 23.01, "learning_rate": 4.604604604604605e-06, "loss": 0.6087, "step": 1300 }, { "epoch": 23.01, "learning_rate": 4.554554554554555e-06, "loss": 0.6529, "step": 1310 }, { "epoch": 23.01, "learning_rate": 4.504504504504505e-06, "loss": 0.7221, "step": 1320 }, { "epoch": 23.02, "learning_rate": 4.454454454454455e-06, "loss": 0.6167, "step": 1330 }, { "epoch": 23.02, "learning_rate": 4.404404404404405e-06, "loss": 0.6987, "step": 1340 }, { "epoch": 23.03, "eval_accuracy": 0.7188940092165899, "eval_loss": 0.7882533669471741, "eval_runtime": 163.4201, "eval_samples_per_second": 1.328, "eval_steps_per_second": 0.171, "step": 1344 }, { "epoch": 24.0, "learning_rate": 4.354354354354355e-06, "loss": 0.7234, "step": 1350 }, { "epoch": 24.01, "learning_rate": 4.304304304304305e-06, "loss": 0.808, "step": 1360 }, { "epoch": 24.01, "learning_rate": 4.254254254254255e-06, "loss": 0.6323, "step": 1370 }, { "epoch": 24.02, "learning_rate": 4.204204204204204e-06, "loss": 0.5463, "step": 1380 }, { "epoch": 24.02, "learning_rate": 4.154154154154154e-06, "loss": 0.7978, "step": 1390 }, { "epoch": 24.03, "learning_rate": 4.1041041041041045e-06, "loss": 0.4979, "step": 1400 }, { "epoch": 24.03, "eval_accuracy": 0.7188940092165899, "eval_loss": 0.7089133262634277, "eval_runtime": 164.3791, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.17, "step": 1400 }, { "epoch": 25.0, "learning_rate": 4.0540540540540545e-06, "loss": 0.4685, "step": 1410 }, { "epoch": 25.01, "learning_rate": 4.004004004004005e-06, "loss": 0.665, "step": 1420 }, { "epoch": 25.01, "learning_rate": 3.953953953953955e-06, "loss": 0.6872, "step": 1430 }, { "epoch": 25.02, "learning_rate": 3.903903903903904e-06, "loss": 0.5854, "step": 1440 }, { "epoch": 25.02, "learning_rate": 3.853853853853854e-06, "loss": 0.6163, "step": 1450 }, { "epoch": 25.03, "eval_accuracy": 0.7235023041474654, "eval_loss": 0.7633541226387024, "eval_runtime": 163.5735, "eval_samples_per_second": 1.327, "eval_steps_per_second": 0.171, "step": 1456 }, { "epoch": 26.0, "learning_rate": 3.803803803803804e-06, "loss": 0.7065, "step": 1460 }, { "epoch": 26.01, "learning_rate": 3.7537537537537537e-06, "loss": 0.6684, "step": 1470 }, { "epoch": 26.01, "learning_rate": 3.7037037037037037e-06, "loss": 0.786, "step": 1480 }, { "epoch": 26.02, "learning_rate": 3.653653653653654e-06, "loss": 0.437, "step": 1490 }, { "epoch": 26.02, "learning_rate": 3.603603603603604e-06, "loss": 0.6708, "step": 1500 }, { "epoch": 26.02, "learning_rate": 3.5535535535535535e-06, "loss": 0.6754, "step": 1510 }, { "epoch": 26.03, "eval_accuracy": 0.6359447004608295, "eval_loss": 0.9443588852882385, "eval_runtime": 163.9867, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.171, "step": 1512 }, { "epoch": 27.0, "learning_rate": 3.5035035035035036e-06, "loss": 0.7284, "step": 1520 }, { "epoch": 27.01, "learning_rate": 3.4534534534534537e-06, "loss": 0.5413, "step": 1530 }, { "epoch": 27.01, "learning_rate": 3.4034034034034037e-06, "loss": 0.6884, "step": 1540 }, { "epoch": 27.02, "learning_rate": 3.3533533533533534e-06, "loss": 0.5541, "step": 1550 }, { "epoch": 27.02, "learning_rate": 3.3033033033033035e-06, "loss": 0.6673, "step": 1560 }, { "epoch": 27.03, "eval_accuracy": 0.6543778801843319, "eval_loss": 0.8390823602676392, "eval_runtime": 164.572, "eval_samples_per_second": 1.319, "eval_steps_per_second": 0.17, "step": 1568 }, { "epoch": 28.0, "learning_rate": 3.2532532532532535e-06, "loss": 0.6895, "step": 1570 }, { "epoch": 28.01, "learning_rate": 3.2032032032032036e-06, "loss": 0.553, "step": 1580 }, { "epoch": 28.01, "learning_rate": 3.1531531531531532e-06, "loss": 0.4311, "step": 1590 }, { "epoch": 28.01, "learning_rate": 3.1031031031031033e-06, "loss": 0.6969, "step": 1600 }, { "epoch": 28.02, "learning_rate": 3.0530530530530534e-06, "loss": 0.7507, "step": 1610 }, { "epoch": 28.02, "learning_rate": 3.0030030030030034e-06, "loss": 0.4924, "step": 1620 }, { "epoch": 28.03, "eval_accuracy": 0.6682027649769585, "eval_loss": 0.8288503885269165, "eval_runtime": 164.6437, "eval_samples_per_second": 1.318, "eval_steps_per_second": 0.17, "step": 1624 }, { "epoch": 29.0, "learning_rate": 2.952952952952953e-06, "loss": 0.7695, "step": 1630 }, { "epoch": 29.01, "learning_rate": 2.902902902902903e-06, "loss": 0.5981, "step": 1640 }, { "epoch": 29.01, "learning_rate": 2.8528528528528532e-06, "loss": 0.613, "step": 1650 }, { "epoch": 29.02, "learning_rate": 2.8028028028028033e-06, "loss": 0.5841, "step": 1660 }, { "epoch": 29.02, "learning_rate": 2.752752752752753e-06, "loss": 0.7704, "step": 1670 }, { "epoch": 29.03, "learning_rate": 2.702702702702703e-06, "loss": 0.6438, "step": 1680 }, { "epoch": 29.03, "eval_accuracy": 0.6129032258064516, "eval_loss": 0.9605286121368408, "eval_runtime": 164.5062, "eval_samples_per_second": 1.319, "eval_steps_per_second": 0.17, "step": 1680 }, { "epoch": 30.0, "learning_rate": 2.652652652652653e-06, "loss": 0.583, "step": 1690 }, { "epoch": 30.01, "learning_rate": 2.602602602602603e-06, "loss": 0.7539, "step": 1700 }, { "epoch": 30.01, "learning_rate": 2.552552552552553e-06, "loss": 0.5418, "step": 1710 }, { "epoch": 30.02, "learning_rate": 2.502502502502503e-06, "loss": 0.6689, "step": 1720 }, { "epoch": 30.02, "learning_rate": 2.4524524524524525e-06, "loss": 0.5714, "step": 1730 }, { "epoch": 30.03, "eval_accuracy": 0.6451612903225806, "eval_loss": 0.8838080763816833, "eval_runtime": 163.4686, "eval_samples_per_second": 1.327, "eval_steps_per_second": 0.171, "step": 1736 }, { "epoch": 31.0, "learning_rate": 2.4024024024024026e-06, "loss": 0.4737, "step": 1740 }, { "epoch": 31.01, "learning_rate": 2.3523523523523527e-06, "loss": 0.6563, "step": 1750 }, { "epoch": 31.01, "learning_rate": 2.3023023023023023e-06, "loss": 0.4469, "step": 1760 }, { "epoch": 31.02, "learning_rate": 2.2522522522522524e-06, "loss": 0.5655, "step": 1770 }, { "epoch": 31.02, "learning_rate": 2.2022022022022024e-06, "loss": 0.5421, "step": 1780 }, { "epoch": 31.02, "learning_rate": 2.1521521521521525e-06, "loss": 0.6726, "step": 1790 }, { "epoch": 31.03, "eval_accuracy": 0.6589861751152074, "eval_loss": 0.8412278890609741, "eval_runtime": 164.9282, "eval_samples_per_second": 1.316, "eval_steps_per_second": 0.17, "step": 1792 }, { "epoch": 32.0, "learning_rate": 2.102102102102102e-06, "loss": 0.7254, "step": 1800 }, { "epoch": 32.01, "learning_rate": 2.0520520520520522e-06, "loss": 0.5397, "step": 1810 }, { "epoch": 32.01, "learning_rate": 2.0020020020020023e-06, "loss": 0.596, "step": 1820 }, { "epoch": 32.02, "learning_rate": 1.951951951951952e-06, "loss": 0.5603, "step": 1830 }, { "epoch": 32.02, "learning_rate": 1.901901901901902e-06, "loss": 0.5027, "step": 1840 }, { "epoch": 32.03, "eval_accuracy": 0.6728110599078341, "eval_loss": 0.8439391255378723, "eval_runtime": 168.1229, "eval_samples_per_second": 1.291, "eval_steps_per_second": 0.167, "step": 1848 }, { "epoch": 33.0, "learning_rate": 1.8518518518518519e-06, "loss": 0.6769, "step": 1850 }, { "epoch": 33.01, "learning_rate": 1.801801801801802e-06, "loss": 0.565, "step": 1860 }, { "epoch": 33.01, "learning_rate": 1.7517517517517518e-06, "loss": 0.556, "step": 1870 }, { "epoch": 33.01, "learning_rate": 1.7017017017017019e-06, "loss": 0.4187, "step": 1880 }, { "epoch": 33.02, "learning_rate": 1.6516516516516517e-06, "loss": 0.4718, "step": 1890 }, { "epoch": 33.02, "learning_rate": 1.6016016016016018e-06, "loss": 0.4649, "step": 1900 }, { "epoch": 33.03, "eval_accuracy": 0.6267281105990783, "eval_loss": 0.9524617791175842, "eval_runtime": 169.6407, "eval_samples_per_second": 1.279, "eval_steps_per_second": 0.165, "step": 1904 }, { "epoch": 34.0, "learning_rate": 1.5515515515515517e-06, "loss": 0.4505, "step": 1910 }, { "epoch": 34.01, "learning_rate": 1.5015015015015017e-06, "loss": 0.5412, "step": 1920 }, { "epoch": 34.01, "learning_rate": 1.4514514514514516e-06, "loss": 0.5931, "step": 1930 }, { "epoch": 34.02, "learning_rate": 1.4014014014014016e-06, "loss": 0.4183, "step": 1940 }, { "epoch": 34.02, "learning_rate": 1.3513513513513515e-06, "loss": 0.6046, "step": 1950 }, { "epoch": 34.03, "learning_rate": 1.3013013013013016e-06, "loss": 0.6625, "step": 1960 }, { "epoch": 34.03, "eval_accuracy": 0.728110599078341, "eval_loss": 0.7850246429443359, "eval_runtime": 164.4991, "eval_samples_per_second": 1.319, "eval_steps_per_second": 0.17, "step": 1960 }, { "epoch": 35.0, "learning_rate": 1.2512512512512514e-06, "loss": 0.5967, "step": 1970 }, { "epoch": 35.01, "learning_rate": 1.2012012012012013e-06, "loss": 0.5503, "step": 1980 }, { "epoch": 35.01, "learning_rate": 1.1511511511511512e-06, "loss": 0.5159, "step": 1990 }, { "epoch": 35.02, "learning_rate": 1.1011011011011012e-06, "loss": 0.4377, "step": 2000 }, { "epoch": 35.02, "learning_rate": 1.051051051051051e-06, "loss": 0.5793, "step": 2010 }, { "epoch": 35.03, "eval_accuracy": 0.6728110599078341, "eval_loss": 0.8481199741363525, "eval_runtime": 164.3359, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.17, "step": 2016 }, { "epoch": 36.0, "learning_rate": 1.0010010010010011e-06, "loss": 0.7009, "step": 2020 }, { "epoch": 36.01, "learning_rate": 9.50950950950951e-07, "loss": 0.5902, "step": 2030 }, { "epoch": 36.01, "learning_rate": 9.00900900900901e-07, "loss": 0.5366, "step": 2040 }, { "epoch": 36.02, "learning_rate": 8.508508508508509e-07, "loss": 0.5119, "step": 2050 }, { "epoch": 36.02, "learning_rate": 8.008008008008009e-07, "loss": 0.4946, "step": 2060 }, { "epoch": 36.02, "learning_rate": 7.507507507507509e-07, "loss": 0.6411, "step": 2070 }, { "epoch": 36.03, "eval_accuracy": 0.6589861751152074, "eval_loss": 0.8842198252677917, "eval_runtime": 164.6218, "eval_samples_per_second": 1.318, "eval_steps_per_second": 0.17, "step": 2072 }, { "epoch": 37.0, "learning_rate": 7.007007007007008e-07, "loss": 0.5172, "step": 2080 }, { "epoch": 37.01, "learning_rate": 6.506506506506508e-07, "loss": 0.3986, "step": 2090 }, { "epoch": 37.01, "learning_rate": 6.006006006006006e-07, "loss": 0.4807, "step": 2100 }, { "epoch": 37.02, "learning_rate": 5.505505505505506e-07, "loss": 0.4681, "step": 2110 }, { "epoch": 37.02, "learning_rate": 5.005005005005006e-07, "loss": 0.6592, "step": 2120 }, { "epoch": 37.03, "eval_accuracy": 0.6912442396313364, "eval_loss": 0.802787184715271, "eval_runtime": 163.9059, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.171, "step": 2128 }, { "epoch": 38.0, "learning_rate": 4.504504504504505e-07, "loss": 0.5769, "step": 2130 }, { "epoch": 38.01, "learning_rate": 4.0040040040040045e-07, "loss": 0.6323, "step": 2140 }, { "epoch": 38.01, "learning_rate": 3.503503503503504e-07, "loss": 0.5319, "step": 2150 }, { "epoch": 38.01, "learning_rate": 3.003003003003003e-07, "loss": 0.6306, "step": 2160 }, { "epoch": 38.02, "learning_rate": 2.502502502502503e-07, "loss": 0.6192, "step": 2170 }, { "epoch": 38.02, "learning_rate": 2.0020020020020022e-07, "loss": 0.5524, "step": 2180 }, { "epoch": 38.03, "eval_accuracy": 0.6866359447004609, "eval_loss": 0.821592390537262, "eval_runtime": 165.1926, "eval_samples_per_second": 1.314, "eval_steps_per_second": 0.169, "step": 2184 }, { "epoch": 39.0, "learning_rate": 1.5015015015015016e-07, "loss": 0.5701, "step": 2190 }, { "epoch": 39.01, "learning_rate": 1.0010010010010011e-07, "loss": 0.5891, "step": 2200 }, { "epoch": 39.01, "learning_rate": 5.0050050050050056e-08, "loss": 0.5807, "step": 2210 }, { "epoch": 39.02, "learning_rate": 0.0, "loss": 0.5697, "step": 2220 }, { "epoch": 39.02, "eval_accuracy": 0.6774193548387096, "eval_loss": 0.833946943283081, "eval_runtime": 164.2083, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.171, "step": 2220 }, { "epoch": 39.02, "step": 2220, "total_flos": 2.208334746599719e+19, "train_loss": 0.8170982454274152, "train_runtime": 23121.8467, "train_samples_per_second": 0.768, "train_steps_per_second": 0.096 }, { "epoch": 39.02, "eval_accuracy": 0.6342592592592593, "eval_loss": 0.8682467341423035, "eval_runtime": 165.599, "eval_samples_per_second": 1.304, "eval_steps_per_second": 0.163, "step": 2220 }, { "epoch": 39.02, "eval_accuracy": 0.6342592592592593, "eval_loss": 0.8682467341423035, "eval_runtime": 165.9338, "eval_samples_per_second": 1.302, "eval_steps_per_second": 0.163, "step": 2220 } ], "logging_steps": 10, "max_steps": 2220, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 2.208334746599719e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }