|
{ |
|
"best_metric": 0.9448669201520913, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-blank_img/checkpoint-222", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 740, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3783783783783788e-06, |
|
"loss": 0.6924, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 6.7567567567567575e-06, |
|
"loss": 0.6582, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.0135135135135136e-05, |
|
"loss": 0.5654, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"loss": 0.4386, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.6891891891891892e-05, |
|
"loss": 0.3558, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.0270270270270273e-05, |
|
"loss": 0.2918, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.364864864864865e-05, |
|
"loss": 0.3143, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.702702702702703e-05, |
|
"loss": 0.2339, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.0405405405405407e-05, |
|
"loss": 0.2426, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.3783783783783784e-05, |
|
"loss": 0.2386, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7162162162162165e-05, |
|
"loss": 0.212, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0540540540540545e-05, |
|
"loss": 0.2642, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.391891891891892e-05, |
|
"loss": 0.2034, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.72972972972973e-05, |
|
"loss": 0.2646, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9391634980988594, |
|
"eval_loss": 0.19738444685935974, |
|
"eval_runtime": 6.6287, |
|
"eval_samples_per_second": 158.704, |
|
"eval_steps_per_second": 4.978, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.9924924924924924e-05, |
|
"loss": 0.2718, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.954954954954955e-05, |
|
"loss": 0.2249, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.9174174174174175e-05, |
|
"loss": 0.2091, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.87987987987988e-05, |
|
"loss": 0.2049, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.8423423423423426e-05, |
|
"loss": 0.2384, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.804804804804805e-05, |
|
"loss": 0.2021, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.767267267267268e-05, |
|
"loss": 0.1791, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.72972972972973e-05, |
|
"loss": 0.2404, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.692192192192192e-05, |
|
"loss": 0.2547, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.654654654654655e-05, |
|
"loss": 0.2253, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.617117117117117e-05, |
|
"loss": 0.2445, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.57957957957958e-05, |
|
"loss": 0.2609, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.5420420420420424e-05, |
|
"loss": 0.2177, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.5045045045045046e-05, |
|
"loss": 0.229, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.4669669669669675e-05, |
|
"loss": 0.2287, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.935361216730038, |
|
"eval_loss": 0.19786785542964935, |
|
"eval_runtime": 7.1255, |
|
"eval_samples_per_second": 147.638, |
|
"eval_steps_per_second": 4.631, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.42942942942943e-05, |
|
"loss": 0.2053, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.391891891891892e-05, |
|
"loss": 0.1898, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.354354354354355e-05, |
|
"loss": 0.1946, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.316816816816817e-05, |
|
"loss": 0.2265, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.27927927927928e-05, |
|
"loss": 0.2404, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.241741741741742e-05, |
|
"loss": 0.2073, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.204204204204204e-05, |
|
"loss": 0.2608, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.2168, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.1291291291291294e-05, |
|
"loss": 0.2091, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 4.091591591591592e-05, |
|
"loss": 0.2468, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.0540540540540545e-05, |
|
"loss": 0.2181, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.016516516516517e-05, |
|
"loss": 0.1717, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.9789789789789796e-05, |
|
"loss": 0.205, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.941441441441442e-05, |
|
"loss": 0.1944, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.903903903903904e-05, |
|
"loss": 0.198, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9448669201520913, |
|
"eval_loss": 0.17274008691310883, |
|
"eval_runtime": 7.4744, |
|
"eval_samples_per_second": 140.747, |
|
"eval_steps_per_second": 4.415, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.866366366366366e-05, |
|
"loss": 0.2083, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.8288288288288285e-05, |
|
"loss": 0.1868, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 3.7912912912912914e-05, |
|
"loss": 0.23, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 3.7537537537537536e-05, |
|
"loss": 0.2083, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3.7162162162162165e-05, |
|
"loss": 0.1851, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.678678678678679e-05, |
|
"loss": 0.2355, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.641141141141141e-05, |
|
"loss": 0.2357, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.603603603603604e-05, |
|
"loss": 0.1995, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 3.566066066066066e-05, |
|
"loss": 0.205, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.528528528528528e-05, |
|
"loss": 0.247, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 3.490990990990991e-05, |
|
"loss": 0.1991, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 3.453453453453453e-05, |
|
"loss": 0.1895, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.415915915915916e-05, |
|
"loss": 0.2351, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 3.3783783783783784e-05, |
|
"loss": 0.2073, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 3.3408408408408406e-05, |
|
"loss": 0.1889, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9429657794676806, |
|
"eval_loss": 0.174727663397789, |
|
"eval_runtime": 7.3243, |
|
"eval_samples_per_second": 143.632, |
|
"eval_steps_per_second": 4.506, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.3033033033033035e-05, |
|
"loss": 0.1688, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.265765765765766e-05, |
|
"loss": 0.1977, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.2282282282282286e-05, |
|
"loss": 0.198, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 3.190690690690691e-05, |
|
"loss": 0.2217, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 3.153153153153153e-05, |
|
"loss": 0.1959, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 3.115615615615616e-05, |
|
"loss": 0.2229, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 3.078078078078078e-05, |
|
"loss": 0.1919, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 3.0405405405405407e-05, |
|
"loss": 0.2318, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 3.0030030030030033e-05, |
|
"loss": 0.214, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 2.9654654654654658e-05, |
|
"loss": 0.1875, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.927927927927928e-05, |
|
"loss": 0.2289, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.8903903903903906e-05, |
|
"loss": 0.1887, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.852852852852853e-05, |
|
"loss": 0.1918, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.8153153153153157e-05, |
|
"loss": 0.1691, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.223, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9448669201520913, |
|
"eval_loss": 0.17105169594287872, |
|
"eval_runtime": 6.7978, |
|
"eval_samples_per_second": 154.755, |
|
"eval_steps_per_second": 4.854, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.7402402402402405e-05, |
|
"loss": 0.1872, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 2.702702702702703e-05, |
|
"loss": 0.2184, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 2.6651651651651656e-05, |
|
"loss": 0.2066, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 2.6276276276276278e-05, |
|
"loss": 0.181, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 2.5900900900900903e-05, |
|
"loss": 0.1743, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 2.552552552552553e-05, |
|
"loss": 0.188, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 2.5150150150150154e-05, |
|
"loss": 0.2181, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 2.4774774774774777e-05, |
|
"loss": 0.2173, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 2.43993993993994e-05, |
|
"loss": 0.2207, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 2.4024024024024024e-05, |
|
"loss": 0.2025, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 2.364864864864865e-05, |
|
"loss": 0.1718, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 2.3273273273273275e-05, |
|
"loss": 0.189, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 2.28978978978979e-05, |
|
"loss": 0.1891, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 2.2522522522522523e-05, |
|
"loss": 0.1771, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.938212927756654, |
|
"eval_loss": 0.16966551542282104, |
|
"eval_runtime": 6.7174, |
|
"eval_samples_per_second": 156.609, |
|
"eval_steps_per_second": 4.913, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 2.214714714714715e-05, |
|
"loss": 0.212, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 2.1771771771771774e-05, |
|
"loss": 0.193, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 2.13963963963964e-05, |
|
"loss": 0.2115, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 2.102102102102102e-05, |
|
"loss": 0.2092, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 2.0645645645645647e-05, |
|
"loss": 0.1678, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 2.0270270270270273e-05, |
|
"loss": 0.1898, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 1.9894894894894898e-05, |
|
"loss": 0.1948, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 1.951951951951952e-05, |
|
"loss": 0.2115, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1.9144144144144142e-05, |
|
"loss": 0.1735, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 1.8768768768768768e-05, |
|
"loss": 0.1791, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 1.8393393393393393e-05, |
|
"loss": 0.1728, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 1.801801801801802e-05, |
|
"loss": 0.1969, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 1.764264264264264e-05, |
|
"loss": 0.2058, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 1.7267267267267267e-05, |
|
"loss": 0.1762, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 1.6891891891891892e-05, |
|
"loss": 0.1864, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9391634980988594, |
|
"eval_loss": 0.16719643771648407, |
|
"eval_runtime": 7.3392, |
|
"eval_samples_per_second": 143.339, |
|
"eval_steps_per_second": 4.496, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.6516516516516518e-05, |
|
"loss": 0.2046, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.6141141141141143e-05, |
|
"loss": 0.1794, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 1.5765765765765765e-05, |
|
"loss": 0.1919, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 1.539039039039039e-05, |
|
"loss": 0.187, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1.5015015015015016e-05, |
|
"loss": 0.1637, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 1.463963963963964e-05, |
|
"loss": 0.1572, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 1.4264264264264266e-05, |
|
"loss": 0.1708, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.1963, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"loss": 0.1977, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 1.3138138138138139e-05, |
|
"loss": 0.1751, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 1.2762762762762764e-05, |
|
"loss": 0.1707, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 1.2387387387387388e-05, |
|
"loss": 0.1561, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 1.2012012012012012e-05, |
|
"loss": 0.1918, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 1.1636636636636638e-05, |
|
"loss": 0.2219, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 1.1261261261261261e-05, |
|
"loss": 0.1716, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9429657794676806, |
|
"eval_loss": 0.18012098968029022, |
|
"eval_runtime": 7.5217, |
|
"eval_samples_per_second": 139.861, |
|
"eval_steps_per_second": 4.387, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 1.0885885885885887e-05, |
|
"loss": 0.1603, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 1.051051051051051e-05, |
|
"loss": 0.1871, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 1.0135135135135136e-05, |
|
"loss": 0.1972, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 9.75975975975976e-06, |
|
"loss": 0.223, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 9.384384384384384e-06, |
|
"loss": 0.1672, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 9.00900900900901e-06, |
|
"loss": 0.1529, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 8.633633633633633e-06, |
|
"loss": 0.1701, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 8.258258258258259e-06, |
|
"loss": 0.1925, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 7.882882882882883e-06, |
|
"loss": 0.1448, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 7.507507507507508e-06, |
|
"loss": 0.181, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 7.132132132132133e-06, |
|
"loss": 0.1984, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 6.7567567567567575e-06, |
|
"loss": 0.203, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 6.381381381381382e-06, |
|
"loss": 0.1554, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 6.006006006006006e-06, |
|
"loss": 0.1743, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 5.630630630630631e-06, |
|
"loss": 0.192, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.94106463878327, |
|
"eval_loss": 0.17536889016628265, |
|
"eval_runtime": 6.7103, |
|
"eval_samples_per_second": 156.774, |
|
"eval_steps_per_second": 4.918, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 5.255255255255255e-06, |
|
"loss": 0.1613, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 4.87987987987988e-06, |
|
"loss": 0.1746, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 4.504504504504505e-06, |
|
"loss": 0.1584, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 4.129129129129129e-06, |
|
"loss": 0.1849, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 3.753753753753754e-06, |
|
"loss": 0.1871, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 3.3783783783783788e-06, |
|
"loss": 0.1302, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 3.003003003003003e-06, |
|
"loss": 0.1586, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 2.6276276276276277e-06, |
|
"loss": 0.2135, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 2.2522522522522524e-06, |
|
"loss": 0.1994, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 1.876876876876877e-06, |
|
"loss": 0.1693, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 1.5015015015015015e-06, |
|
"loss": 0.2072, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 1.1261261261261262e-06, |
|
"loss": 0.1806, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 7.507507507507508e-07, |
|
"loss": 0.1692, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 3.753753753753754e-07, |
|
"loss": 0.1998, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.1886, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9420152091254753, |
|
"eval_loss": 0.17660604417324066, |
|
"eval_runtime": 6.7015, |
|
"eval_samples_per_second": 156.979, |
|
"eval_steps_per_second": 4.924, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 740, |
|
"total_flos": 2.3513765580619776e+18, |
|
"train_loss": 0.21299834186966354, |
|
"train_runtime": 1451.9061, |
|
"train_samples_per_second": 65.156, |
|
"train_steps_per_second": 0.51 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 740, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2.3513765580619776e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|