{ "best_metric": 0.9481739412098146, "best_model_checkpoint": "deit-tiny-patch16-224-finetuned-og-dataset-10e/checkpoint-2184", "epoch": 5.998628257887518, "global_step": 2184, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2.2831050228310503e-06, "loss": 1.4902, "step": 10 }, { "epoch": 0.05, "learning_rate": 4.566210045662101e-06, "loss": 1.3426, "step": 20 }, { "epoch": 0.08, "learning_rate": 6.849315068493151e-06, "loss": 1.1594, "step": 30 }, { "epoch": 0.11, "learning_rate": 9.132420091324201e-06, "loss": 0.9353, "step": 40 }, { "epoch": 0.14, "learning_rate": 1.1415525114155251e-05, "loss": 0.8424, "step": 50 }, { "epoch": 0.16, "learning_rate": 1.3698630136986302e-05, "loss": 0.8113, "step": 60 }, { "epoch": 0.19, "learning_rate": 1.5981735159817352e-05, "loss": 0.7902, "step": 70 }, { "epoch": 0.22, "learning_rate": 1.8264840182648402e-05, "loss": 0.7755, "step": 80 }, { "epoch": 0.25, "learning_rate": 2.0547945205479453e-05, "loss": 0.7586, "step": 90 }, { "epoch": 0.27, "learning_rate": 2.2831050228310503e-05, "loss": 0.7285, "step": 100 }, { "epoch": 0.3, "learning_rate": 2.5114155251141553e-05, "loss": 0.7206, "step": 110 }, { "epoch": 0.33, "learning_rate": 2.7397260273972603e-05, "loss": 0.6964, "step": 120 }, { "epoch": 0.36, "learning_rate": 2.9680365296803654e-05, "loss": 0.6705, "step": 130 }, { "epoch": 0.38, "learning_rate": 3.1963470319634704e-05, "loss": 0.6803, "step": 140 }, { "epoch": 0.41, "learning_rate": 3.424657534246575e-05, "loss": 0.6864, "step": 150 }, { "epoch": 0.44, "learning_rate": 3.6529680365296805e-05, "loss": 0.6818, "step": 160 }, { "epoch": 0.47, "learning_rate": 3.881278538812785e-05, "loss": 0.6484, "step": 170 }, { "epoch": 0.49, "learning_rate": 4.1095890410958905e-05, "loss": 0.6527, "step": 180 }, { "epoch": 0.52, "learning_rate": 4.337899543378995e-05, "loss": 0.632, "step": 190 }, { "epoch": 0.55, "learning_rate": 4.5662100456621006e-05, "loss": 0.6276, "step": 200 }, { "epoch": 0.58, "learning_rate": 4.794520547945205e-05, "loss": 0.6604, "step": 210 }, { "epoch": 0.6, "learning_rate": 4.997455470737914e-05, "loss": 0.6407, "step": 220 }, { "epoch": 0.63, "learning_rate": 4.9720101781170486e-05, "loss": 0.6526, "step": 230 }, { "epoch": 0.66, "learning_rate": 4.9465648854961834e-05, "loss": 0.6136, "step": 240 }, { "epoch": 0.69, "learning_rate": 4.921119592875318e-05, "loss": 0.5914, "step": 250 }, { "epoch": 0.71, "learning_rate": 4.895674300254453e-05, "loss": 0.6173, "step": 260 }, { "epoch": 0.74, "learning_rate": 4.8702290076335885e-05, "loss": 0.6036, "step": 270 }, { "epoch": 0.77, "learning_rate": 4.844783715012723e-05, "loss": 0.5949, "step": 280 }, { "epoch": 0.8, "learning_rate": 4.8193384223918575e-05, "loss": 0.5636, "step": 290 }, { "epoch": 0.82, "learning_rate": 4.793893129770993e-05, "loss": 0.557, "step": 300 }, { "epoch": 0.85, "learning_rate": 4.768447837150127e-05, "loss": 0.567, "step": 310 }, { "epoch": 0.88, "learning_rate": 4.743002544529263e-05, "loss": 0.5625, "step": 320 }, { "epoch": 0.91, "learning_rate": 4.717557251908397e-05, "loss": 0.5814, "step": 330 }, { "epoch": 0.93, "learning_rate": 4.6921119592875317e-05, "loss": 0.5825, "step": 340 }, { "epoch": 0.96, "learning_rate": 4.666666666666667e-05, "loss": 0.52, "step": 350 }, { "epoch": 0.99, "learning_rate": 4.641221374045801e-05, "loss": 0.5434, "step": 360 }, { "epoch": 1.0, "eval_accuracy": 0.8003886954409264, "eval_loss": 0.46485939621925354, "eval_runtime": 145.162, "eval_samples_per_second": 85.07, "eval_steps_per_second": 1.777, "step": 364 }, { "epoch": 1.02, "learning_rate": 4.615776081424937e-05, "loss": 0.5464, "step": 370 }, { "epoch": 1.04, "learning_rate": 4.5903307888040716e-05, "loss": 0.5175, "step": 380 }, { "epoch": 1.07, "learning_rate": 4.5648854961832065e-05, "loss": 0.542, "step": 390 }, { "epoch": 1.1, "learning_rate": 4.539440203562341e-05, "loss": 0.5581, "step": 400 }, { "epoch": 1.13, "learning_rate": 4.513994910941476e-05, "loss": 0.5754, "step": 410 }, { "epoch": 1.15, "learning_rate": 4.488549618320611e-05, "loss": 0.5116, "step": 420 }, { "epoch": 1.18, "learning_rate": 4.463104325699746e-05, "loss": 0.5006, "step": 430 }, { "epoch": 1.21, "learning_rate": 4.4376590330788806e-05, "loss": 0.4939, "step": 440 }, { "epoch": 1.24, "learning_rate": 4.4122137404580154e-05, "loss": 0.5006, "step": 450 }, { "epoch": 1.26, "learning_rate": 4.38676844783715e-05, "loss": 0.4861, "step": 460 }, { "epoch": 1.29, "learning_rate": 4.361323155216285e-05, "loss": 0.4626, "step": 470 }, { "epoch": 1.32, "learning_rate": 4.33587786259542e-05, "loss": 0.4723, "step": 480 }, { "epoch": 1.35, "learning_rate": 4.3104325699745554e-05, "loss": 0.4499, "step": 490 }, { "epoch": 1.37, "learning_rate": 4.2849872773536896e-05, "loss": 0.477, "step": 500 }, { "epoch": 1.4, "learning_rate": 4.2595419847328244e-05, "loss": 0.4804, "step": 510 }, { "epoch": 1.43, "learning_rate": 4.23409669211196e-05, "loss": 0.4805, "step": 520 }, { "epoch": 1.46, "learning_rate": 4.208651399491094e-05, "loss": 0.4611, "step": 530 }, { "epoch": 1.48, "learning_rate": 4.1832061068702296e-05, "loss": 0.4573, "step": 540 }, { "epoch": 1.51, "learning_rate": 4.157760814249364e-05, "loss": 0.4532, "step": 550 }, { "epoch": 1.54, "learning_rate": 4.1323155216284985e-05, "loss": 0.4472, "step": 560 }, { "epoch": 1.57, "learning_rate": 4.106870229007634e-05, "loss": 0.4563, "step": 570 }, { "epoch": 1.59, "learning_rate": 4.081424936386768e-05, "loss": 0.4394, "step": 580 }, { "epoch": 1.62, "learning_rate": 4.055979643765904e-05, "loss": 0.4599, "step": 590 }, { "epoch": 1.65, "learning_rate": 4.0305343511450385e-05, "loss": 0.4466, "step": 600 }, { "epoch": 1.67, "learning_rate": 4.0050890585241734e-05, "loss": 0.443, "step": 610 }, { "epoch": 1.7, "learning_rate": 3.979643765903308e-05, "loss": 0.4413, "step": 620 }, { "epoch": 1.73, "learning_rate": 3.954198473282443e-05, "loss": 0.4168, "step": 630 }, { "epoch": 1.76, "learning_rate": 3.928753180661578e-05, "loss": 0.4134, "step": 640 }, { "epoch": 1.78, "learning_rate": 3.903307888040713e-05, "loss": 0.4401, "step": 650 }, { "epoch": 1.81, "learning_rate": 3.8778625954198475e-05, "loss": 0.4113, "step": 660 }, { "epoch": 1.84, "learning_rate": 3.852417302798982e-05, "loss": 0.4051, "step": 670 }, { "epoch": 1.87, "learning_rate": 3.826972010178117e-05, "loss": 0.435, "step": 680 }, { "epoch": 1.89, "learning_rate": 3.801526717557252e-05, "loss": 0.4313, "step": 690 }, { "epoch": 1.92, "learning_rate": 3.776081424936387e-05, "loss": 0.4097, "step": 700 }, { "epoch": 1.95, "learning_rate": 3.750636132315522e-05, "loss": 0.4292, "step": 710 }, { "epoch": 1.98, "learning_rate": 3.7251908396946565e-05, "loss": 0.3968, "step": 720 }, { "epoch": 2.0, "eval_accuracy": 0.8713256134099927, "eval_loss": 0.31458979845046997, "eval_runtime": 148.1663, "eval_samples_per_second": 83.346, "eval_steps_per_second": 1.741, "step": 728 }, { "epoch": 2.01, "learning_rate": 3.699745547073791e-05, "loss": 0.4217, "step": 730 }, { "epoch": 2.03, "learning_rate": 3.674300254452927e-05, "loss": 0.404, "step": 740 }, { "epoch": 2.06, "learning_rate": 3.648854961832061e-05, "loss": 0.3986, "step": 750 }, { "epoch": 2.09, "learning_rate": 3.6234096692111965e-05, "loss": 0.3785, "step": 760 }, { "epoch": 2.12, "learning_rate": 3.5979643765903306e-05, "loss": 0.4032, "step": 770 }, { "epoch": 2.14, "learning_rate": 3.5725190839694654e-05, "loss": 0.3763, "step": 780 }, { "epoch": 2.17, "learning_rate": 3.547073791348601e-05, "loss": 0.3583, "step": 790 }, { "epoch": 2.2, "learning_rate": 3.521628498727735e-05, "loss": 0.4012, "step": 800 }, { "epoch": 2.22, "learning_rate": 3.4961832061068706e-05, "loss": 0.3448, "step": 810 }, { "epoch": 2.25, "learning_rate": 3.4707379134860054e-05, "loss": 0.398, "step": 820 }, { "epoch": 2.28, "learning_rate": 3.44529262086514e-05, "loss": 0.3806, "step": 830 }, { "epoch": 2.31, "learning_rate": 3.419847328244275e-05, "loss": 0.3674, "step": 840 }, { "epoch": 2.33, "learning_rate": 3.39440203562341e-05, "loss": 0.3559, "step": 850 }, { "epoch": 2.36, "learning_rate": 3.368956743002545e-05, "loss": 0.3825, "step": 860 }, { "epoch": 2.39, "learning_rate": 3.3435114503816796e-05, "loss": 0.375, "step": 870 }, { "epoch": 2.42, "learning_rate": 3.3180661577608144e-05, "loss": 0.3558, "step": 880 }, { "epoch": 2.44, "learning_rate": 3.292620865139949e-05, "loss": 0.3791, "step": 890 }, { "epoch": 2.47, "learning_rate": 3.267175572519084e-05, "loss": 0.3627, "step": 900 }, { "epoch": 2.5, "learning_rate": 3.241730279898219e-05, "loss": 0.3804, "step": 910 }, { "epoch": 2.53, "learning_rate": 3.216284987277354e-05, "loss": 0.3591, "step": 920 }, { "epoch": 2.55, "learning_rate": 3.190839694656489e-05, "loss": 0.3745, "step": 930 }, { "epoch": 2.58, "learning_rate": 3.1653944020356234e-05, "loss": 0.338, "step": 940 }, { "epoch": 2.61, "learning_rate": 3.139949109414758e-05, "loss": 0.343, "step": 950 }, { "epoch": 2.64, "learning_rate": 3.114503816793894e-05, "loss": 0.3537, "step": 960 }, { "epoch": 2.66, "learning_rate": 3.089058524173028e-05, "loss": 0.3347, "step": 970 }, { "epoch": 2.69, "learning_rate": 3.0636132315521633e-05, "loss": 0.3517, "step": 980 }, { "epoch": 2.72, "learning_rate": 3.038167938931298e-05, "loss": 0.336, "step": 990 }, { "epoch": 2.75, "learning_rate": 3.0127226463104323e-05, "loss": 0.3813, "step": 1000 }, { "epoch": 2.77, "learning_rate": 2.9872773536895675e-05, "loss": 0.3449, "step": 1010 }, { "epoch": 2.8, "learning_rate": 2.9618320610687023e-05, "loss": 0.3342, "step": 1020 }, { "epoch": 2.83, "learning_rate": 2.9363867684478375e-05, "loss": 0.3299, "step": 1030 }, { "epoch": 2.86, "learning_rate": 2.910941475826972e-05, "loss": 0.3376, "step": 1040 }, { "epoch": 2.88, "learning_rate": 2.885496183206107e-05, "loss": 0.3359, "step": 1050 }, { "epoch": 2.91, "learning_rate": 2.860050890585242e-05, "loss": 0.3321, "step": 1060 }, { "epoch": 2.94, "learning_rate": 2.8346055979643765e-05, "loss": 0.3126, "step": 1070 }, { "epoch": 2.97, "learning_rate": 2.8091603053435116e-05, "loss": 0.3352, "step": 1080 }, { "epoch": 2.99, "learning_rate": 2.7837150127226465e-05, "loss": 0.3075, "step": 1090 }, { "epoch": 3.0, "eval_accuracy": 0.901206575431209, "eval_loss": 0.24771326780319214, "eval_runtime": 151.6412, "eval_samples_per_second": 81.436, "eval_steps_per_second": 1.701, "step": 1092 }, { "epoch": 3.02, "learning_rate": 2.7582697201017816e-05, "loss": 0.3368, "step": 1100 }, { "epoch": 3.05, "learning_rate": 2.732824427480916e-05, "loss": 0.2966, "step": 1110 }, { "epoch": 3.08, "learning_rate": 2.707379134860051e-05, "loss": 0.332, "step": 1120 }, { "epoch": 3.1, "learning_rate": 2.681933842239186e-05, "loss": 0.3006, "step": 1130 }, { "epoch": 3.13, "learning_rate": 2.6564885496183206e-05, "loss": 0.2954, "step": 1140 }, { "epoch": 3.16, "learning_rate": 2.6310432569974558e-05, "loss": 0.3323, "step": 1150 }, { "epoch": 3.19, "learning_rate": 2.6055979643765906e-05, "loss": 0.3164, "step": 1160 }, { "epoch": 3.21, "learning_rate": 2.580152671755725e-05, "loss": 0.3114, "step": 1170 }, { "epoch": 3.24, "learning_rate": 2.5547073791348602e-05, "loss": 0.3139, "step": 1180 }, { "epoch": 3.27, "learning_rate": 2.5292620865139947e-05, "loss": 0.322, "step": 1190 }, { "epoch": 3.3, "learning_rate": 2.50381679389313e-05, "loss": 0.2801, "step": 1200 }, { "epoch": 3.32, "learning_rate": 2.4783715012722647e-05, "loss": 0.3153, "step": 1210 }, { "epoch": 3.35, "learning_rate": 2.4529262086513996e-05, "loss": 0.2958, "step": 1220 }, { "epoch": 3.38, "learning_rate": 2.4274809160305344e-05, "loss": 0.295, "step": 1230 }, { "epoch": 3.41, "learning_rate": 2.4020356234096695e-05, "loss": 0.2996, "step": 1240 }, { "epoch": 3.43, "learning_rate": 2.376590330788804e-05, "loss": 0.3155, "step": 1250 }, { "epoch": 3.46, "learning_rate": 2.351145038167939e-05, "loss": 0.3116, "step": 1260 }, { "epoch": 3.49, "learning_rate": 2.325699745547074e-05, "loss": 0.2996, "step": 1270 }, { "epoch": 3.52, "learning_rate": 2.300254452926209e-05, "loss": 0.3183, "step": 1280 }, { "epoch": 3.54, "learning_rate": 2.2748091603053437e-05, "loss": 0.2928, "step": 1290 }, { "epoch": 3.57, "learning_rate": 2.2493638676844785e-05, "loss": 0.3158, "step": 1300 }, { "epoch": 3.6, "learning_rate": 2.2239185750636133e-05, "loss": 0.2972, "step": 1310 }, { "epoch": 3.63, "learning_rate": 2.198473282442748e-05, "loss": 0.2891, "step": 1320 }, { "epoch": 3.65, "learning_rate": 2.173027989821883e-05, "loss": 0.2935, "step": 1330 }, { "epoch": 3.68, "learning_rate": 2.1475826972010178e-05, "loss": 0.2984, "step": 1340 }, { "epoch": 3.71, "learning_rate": 2.122137404580153e-05, "loss": 0.2867, "step": 1350 }, { "epoch": 3.74, "learning_rate": 2.0966921119592875e-05, "loss": 0.2849, "step": 1360 }, { "epoch": 3.76, "learning_rate": 2.0712468193384223e-05, "loss": 0.2478, "step": 1370 }, { "epoch": 3.79, "learning_rate": 2.0458015267175575e-05, "loss": 0.2982, "step": 1380 }, { "epoch": 3.82, "learning_rate": 2.0203562340966923e-05, "loss": 0.2891, "step": 1390 }, { "epoch": 3.84, "learning_rate": 1.994910941475827e-05, "loss": 0.3038, "step": 1400 }, { "epoch": 3.87, "learning_rate": 1.969465648854962e-05, "loss": 0.3008, "step": 1410 }, { "epoch": 3.9, "learning_rate": 1.9440203562340968e-05, "loss": 0.2778, "step": 1420 }, { "epoch": 3.93, "learning_rate": 1.9185750636132316e-05, "loss": 0.2744, "step": 1430 }, { "epoch": 3.95, "learning_rate": 1.8931297709923664e-05, "loss": 0.2691, "step": 1440 }, { "epoch": 3.98, "learning_rate": 1.8676844783715013e-05, "loss": 0.2961, "step": 1450 }, { "epoch": 4.0, "eval_accuracy": 0.9335168839582152, "eval_loss": 0.17737890779972076, "eval_runtime": 142.2521, "eval_samples_per_second": 86.811, "eval_steps_per_second": 1.814, "step": 1456 }, { "epoch": 4.01, "learning_rate": 1.8422391857506364e-05, "loss": 0.2954, "step": 1460 }, { "epoch": 4.04, "learning_rate": 1.816793893129771e-05, "loss": 0.2703, "step": 1470 }, { "epoch": 4.07, "learning_rate": 1.7913486005089058e-05, "loss": 0.2547, "step": 1480 }, { "epoch": 4.09, "learning_rate": 1.765903307888041e-05, "loss": 0.2583, "step": 1490 }, { "epoch": 4.12, "learning_rate": 1.7404580152671757e-05, "loss": 0.2686, "step": 1500 }, { "epoch": 4.15, "learning_rate": 1.7150127226463106e-05, "loss": 0.2774, "step": 1510 }, { "epoch": 4.18, "learning_rate": 1.6895674300254454e-05, "loss": 0.2708, "step": 1520 }, { "epoch": 4.2, "learning_rate": 1.6641221374045802e-05, "loss": 0.256, "step": 1530 }, { "epoch": 4.23, "learning_rate": 1.638676844783715e-05, "loss": 0.2766, "step": 1540 }, { "epoch": 4.26, "learning_rate": 1.61323155216285e-05, "loss": 0.263, "step": 1550 }, { "epoch": 4.29, "learning_rate": 1.5877862595419847e-05, "loss": 0.2478, "step": 1560 }, { "epoch": 4.31, "learning_rate": 1.56234096692112e-05, "loss": 0.2548, "step": 1570 }, { "epoch": 4.34, "learning_rate": 1.5368956743002547e-05, "loss": 0.2878, "step": 1580 }, { "epoch": 4.37, "learning_rate": 1.5114503816793894e-05, "loss": 0.2646, "step": 1590 }, { "epoch": 4.4, "learning_rate": 1.4860050890585242e-05, "loss": 0.2616, "step": 1600 }, { "epoch": 4.42, "learning_rate": 1.4605597964376592e-05, "loss": 0.2469, "step": 1610 }, { "epoch": 4.45, "learning_rate": 1.435114503816794e-05, "loss": 0.2539, "step": 1620 }, { "epoch": 4.48, "learning_rate": 1.4096692111959288e-05, "loss": 0.2536, "step": 1630 }, { "epoch": 4.5, "learning_rate": 1.3842239185750635e-05, "loss": 0.2763, "step": 1640 }, { "epoch": 4.53, "learning_rate": 1.3587786259541985e-05, "loss": 0.2681, "step": 1650 }, { "epoch": 4.56, "learning_rate": 1.3333333333333333e-05, "loss": 0.2618, "step": 1660 }, { "epoch": 4.59, "learning_rate": 1.3078880407124683e-05, "loss": 0.2802, "step": 1670 }, { "epoch": 4.61, "learning_rate": 1.2824427480916032e-05, "loss": 0.2462, "step": 1680 }, { "epoch": 4.64, "learning_rate": 1.2569974554707382e-05, "loss": 0.2621, "step": 1690 }, { "epoch": 4.67, "learning_rate": 1.2315521628498728e-05, "loss": 0.2587, "step": 1700 }, { "epoch": 4.7, "learning_rate": 1.2061068702290076e-05, "loss": 0.2762, "step": 1710 }, { "epoch": 4.72, "learning_rate": 1.1806615776081426e-05, "loss": 0.2635, "step": 1720 }, { "epoch": 4.75, "learning_rate": 1.1552162849872775e-05, "loss": 0.265, "step": 1730 }, { "epoch": 4.78, "learning_rate": 1.1297709923664123e-05, "loss": 0.2712, "step": 1740 }, { "epoch": 4.81, "learning_rate": 1.1043256997455471e-05, "loss": 0.2392, "step": 1750 }, { "epoch": 4.83, "learning_rate": 1.0788804071246821e-05, "loss": 0.263, "step": 1760 }, { "epoch": 4.86, "learning_rate": 1.0534351145038168e-05, "loss": 0.2365, "step": 1770 }, { "epoch": 4.89, "learning_rate": 1.0279898218829518e-05, "loss": 0.2504, "step": 1780 }, { "epoch": 4.92, "learning_rate": 1.0025445292620864e-05, "loss": 0.2498, "step": 1790 }, { "epoch": 4.94, "learning_rate": 9.770992366412214e-06, "loss": 0.2449, "step": 1800 }, { "epoch": 4.97, "learning_rate": 9.516539440203563e-06, "loss": 0.2531, "step": 1810 }, { "epoch": 5.0, "learning_rate": 9.26208651399491e-06, "loss": 0.2523, "step": 1820 }, { "epoch": 5.0, "eval_accuracy": 0.9421815531621993, "eval_loss": 0.15593743324279785, "eval_runtime": 137.5508, "eval_samples_per_second": 89.778, "eval_steps_per_second": 1.876, "step": 1820 }, { "epoch": 5.03, "learning_rate": 9.00763358778626e-06, "loss": 0.2562, "step": 1830 }, { "epoch": 5.05, "learning_rate": 8.753180661577609e-06, "loss": 0.22, "step": 1840 }, { "epoch": 5.08, "learning_rate": 8.498727735368957e-06, "loss": 0.2439, "step": 1850 }, { "epoch": 5.11, "learning_rate": 8.244274809160306e-06, "loss": 0.2483, "step": 1860 }, { "epoch": 5.14, "learning_rate": 7.989821882951656e-06, "loss": 0.2437, "step": 1870 }, { "epoch": 5.16, "learning_rate": 7.735368956743002e-06, "loss": 0.2555, "step": 1880 }, { "epoch": 5.19, "learning_rate": 7.480916030534352e-06, "loss": 0.2282, "step": 1890 }, { "epoch": 5.22, "learning_rate": 7.2264631043257e-06, "loss": 0.243, "step": 1900 }, { "epoch": 5.25, "learning_rate": 6.972010178117049e-06, "loss": 0.2138, "step": 1910 }, { "epoch": 5.27, "learning_rate": 6.717557251908398e-06, "loss": 0.2263, "step": 1920 }, { "epoch": 5.3, "learning_rate": 6.463104325699745e-06, "loss": 0.2305, "step": 1930 }, { "epoch": 5.33, "learning_rate": 6.208651399491094e-06, "loss": 0.2165, "step": 1940 }, { "epoch": 5.36, "learning_rate": 5.954198473282443e-06, "loss": 0.2327, "step": 1950 }, { "epoch": 5.38, "learning_rate": 5.699745547073792e-06, "loss": 0.253, "step": 1960 }, { "epoch": 5.41, "learning_rate": 5.44529262086514e-06, "loss": 0.2206, "step": 1970 }, { "epoch": 5.44, "learning_rate": 5.190839694656488e-06, "loss": 0.2571, "step": 1980 }, { "epoch": 5.47, "learning_rate": 4.9363867684478375e-06, "loss": 0.2242, "step": 1990 }, { "epoch": 5.49, "learning_rate": 4.681933842239187e-06, "loss": 0.2402, "step": 2000 }, { "epoch": 5.52, "learning_rate": 4.427480916030535e-06, "loss": 0.2462, "step": 2010 }, { "epoch": 5.55, "learning_rate": 4.173027989821883e-06, "loss": 0.217, "step": 2020 }, { "epoch": 5.58, "learning_rate": 3.918575063613232e-06, "loss": 0.2338, "step": 2030 }, { "epoch": 5.6, "learning_rate": 3.6641221374045806e-06, "loss": 0.2559, "step": 2040 }, { "epoch": 5.63, "learning_rate": 3.409669211195929e-06, "loss": 0.2408, "step": 2050 }, { "epoch": 5.66, "learning_rate": 3.155216284987277e-06, "loss": 0.2546, "step": 2060 }, { "epoch": 5.69, "learning_rate": 2.900763358778626e-06, "loss": 0.2327, "step": 2070 }, { "epoch": 5.71, "learning_rate": 2.646310432569975e-06, "loss": 0.2258, "step": 2080 }, { "epoch": 5.74, "learning_rate": 2.3918575063613232e-06, "loss": 0.2406, "step": 2090 }, { "epoch": 5.77, "learning_rate": 2.137404580152672e-06, "loss": 0.2117, "step": 2100 }, { "epoch": 5.8, "learning_rate": 1.8829516539440202e-06, "loss": 0.2513, "step": 2110 }, { "epoch": 5.82, "learning_rate": 1.6284987277353691e-06, "loss": 0.2285, "step": 2120 }, { "epoch": 5.85, "learning_rate": 1.3740458015267176e-06, "loss": 0.2422, "step": 2130 }, { "epoch": 5.88, "learning_rate": 1.119592875318066e-06, "loss": 0.2333, "step": 2140 }, { "epoch": 5.91, "learning_rate": 8.651399491094147e-07, "loss": 0.2206, "step": 2150 }, { "epoch": 5.93, "learning_rate": 6.106870229007634e-07, "loss": 0.2436, "step": 2160 }, { "epoch": 5.96, "learning_rate": 3.56234096692112e-07, "loss": 0.22, "step": 2170 }, { "epoch": 5.99, "learning_rate": 1.0178117048346057e-07, "loss": 0.2304, "step": 2180 }, { "epoch": 6.0, "eval_accuracy": 0.9481739412098146, "eval_loss": 0.14020109176635742, "eval_runtime": 136.7445, "eval_samples_per_second": 90.307, "eval_steps_per_second": 1.887, "step": 2184 }, { "epoch": 6.0, "step": 2184, "total_flos": 2.0946151345503928e+18, "train_loss": 0.3889721456519413, "train_runtime": 6320.4879, "train_samples_per_second": 66.424, "train_steps_per_second": 0.346 } ], "max_steps": 2184, "num_train_epochs": 6, "total_flos": 2.0946151345503928e+18, "trial_name": null, "trial_params": null }