{ "best_metric": 0.7649769585253456, "best_model_checkpoint": "videomae-base-finetuned-subset\\checkpoint-2016", "epoch": 59.00780780780781, "eval_steps": 500, "global_step": 6660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 7.507507507507508e-07, "loss": 1.7044, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.5015015015015015e-06, "loss": 1.6581, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.2522522522522524e-06, "loss": 1.5572, "step": 30 }, { "epoch": 0.01, "learning_rate": 3.003003003003003e-06, "loss": 1.6749, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.753753753753754e-06, "loss": 1.6511, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.504504504504505e-06, "loss": 1.6406, "step": 60 }, { "epoch": 0.01, "learning_rate": 5.255255255255255e-06, "loss": 1.6205, "step": 70 }, { "epoch": 0.01, "learning_rate": 6.006006006006006e-06, "loss": 1.707, "step": 80 }, { "epoch": 0.01, "learning_rate": 6.7567567567567575e-06, "loss": 1.5589, "step": 90 }, { "epoch": 0.02, "learning_rate": 7.507507507507508e-06, "loss": 1.6865, "step": 100 }, { "epoch": 0.02, "learning_rate": 8.258258258258259e-06, "loss": 1.6074, "step": 110 }, { "epoch": 0.02, "eval_accuracy": 0.3686635944700461, "eval_loss": 1.5690008401870728, "eval_runtime": 253.0884, "eval_samples_per_second": 0.857, "eval_steps_per_second": 0.217, "step": 112 }, { "epoch": 1.0, "learning_rate": 9.00900900900901e-06, "loss": 1.5636, "step": 120 }, { "epoch": 1.0, "learning_rate": 9.75975975975976e-06, "loss": 1.6139, "step": 130 }, { "epoch": 1.0, "learning_rate": 1.051051051051051e-05, "loss": 1.591, "step": 140 }, { "epoch": 1.01, "learning_rate": 1.1261261261261261e-05, "loss": 1.5824, "step": 150 }, { "epoch": 1.01, "learning_rate": 1.2012012012012012e-05, "loss": 1.6132, "step": 160 }, { "epoch": 1.01, "learning_rate": 1.2762762762762764e-05, "loss": 1.6279, "step": 170 }, { "epoch": 1.01, "learning_rate": 1.3513513513513515e-05, "loss": 1.6001, "step": 180 }, { "epoch": 1.01, "learning_rate": 1.4264264264264266e-05, "loss": 1.5873, "step": 190 }, { "epoch": 1.01, "learning_rate": 1.5015015015015016e-05, "loss": 1.59, "step": 200 }, { "epoch": 1.01, "learning_rate": 1.5765765765765765e-05, "loss": 1.5691, "step": 210 }, { "epoch": 1.02, "learning_rate": 1.6516516516516518e-05, "loss": 1.6001, "step": 220 }, { "epoch": 1.02, "eval_accuracy": 0.30414746543778803, "eval_loss": 1.5782501697540283, "eval_runtime": 245.0114, "eval_samples_per_second": 0.886, "eval_steps_per_second": 0.224, "step": 224 }, { "epoch": 2.0, "learning_rate": 1.7267267267267267e-05, "loss": 1.5771, "step": 230 }, { "epoch": 2.0, "learning_rate": 1.801801801801802e-05, "loss": 1.5651, "step": 240 }, { "epoch": 2.0, "learning_rate": 1.8768768768768768e-05, "loss": 1.4911, "step": 250 }, { "epoch": 2.01, "learning_rate": 1.951951951951952e-05, "loss": 1.6414, "step": 260 }, { "epoch": 2.01, "learning_rate": 2.0270270270270273e-05, "loss": 1.5454, "step": 270 }, { "epoch": 2.01, "learning_rate": 2.102102102102102e-05, "loss": 1.4563, "step": 280 }, { "epoch": 2.01, "learning_rate": 2.1771771771771774e-05, "loss": 1.4325, "step": 290 }, { "epoch": 2.01, "learning_rate": 2.2522522522522523e-05, "loss": 1.4738, "step": 300 }, { "epoch": 2.01, "learning_rate": 2.3273273273273275e-05, "loss": 1.3045, "step": 310 }, { "epoch": 2.01, "learning_rate": 2.4024024024024024e-05, "loss": 1.2273, "step": 320 }, { "epoch": 2.02, "learning_rate": 2.4774774774774777e-05, "loss": 1.4193, "step": 330 }, { "epoch": 2.02, "eval_accuracy": 0.3824884792626728, "eval_loss": 1.4873727560043335, "eval_runtime": 251.8516, "eval_samples_per_second": 0.862, "eval_steps_per_second": 0.218, "step": 336 }, { "epoch": 3.0, "learning_rate": 2.552552552552553e-05, "loss": 1.5274, "step": 340 }, { "epoch": 3.0, "learning_rate": 2.6276276276276278e-05, "loss": 1.3287, "step": 350 }, { "epoch": 3.0, "learning_rate": 2.702702702702703e-05, "loss": 1.4428, "step": 360 }, { "epoch": 3.01, "learning_rate": 2.777777777777778e-05, "loss": 1.2924, "step": 370 }, { "epoch": 3.01, "learning_rate": 2.852852852852853e-05, "loss": 1.204, "step": 380 }, { "epoch": 3.01, "learning_rate": 2.927927927927928e-05, "loss": 1.1626, "step": 390 }, { "epoch": 3.01, "learning_rate": 3.0030030030030033e-05, "loss": 1.5428, "step": 400 }, { "epoch": 3.01, "learning_rate": 3.078078078078078e-05, "loss": 1.1494, "step": 410 }, { "epoch": 3.01, "learning_rate": 3.153153153153153e-05, "loss": 1.2899, "step": 420 }, { "epoch": 3.01, "learning_rate": 3.2282282282282286e-05, "loss": 1.4876, "step": 430 }, { "epoch": 3.02, "learning_rate": 3.3033033033033035e-05, "loss": 1.398, "step": 440 }, { "epoch": 3.02, "eval_accuracy": 0.6405529953917051, "eval_loss": 1.0196824073791504, "eval_runtime": 247.8505, "eval_samples_per_second": 0.876, "eval_steps_per_second": 0.222, "step": 448 }, { "epoch": 4.0, "learning_rate": 3.3783783783783784e-05, "loss": 1.3202, "step": 450 }, { "epoch": 4.0, "learning_rate": 3.453453453453453e-05, "loss": 1.145, "step": 460 }, { "epoch": 4.0, "learning_rate": 3.528528528528528e-05, "loss": 1.2636, "step": 470 }, { "epoch": 4.0, "learning_rate": 3.603603603603604e-05, "loss": 1.2266, "step": 480 }, { "epoch": 4.01, "learning_rate": 3.678678678678679e-05, "loss": 1.6096, "step": 490 }, { "epoch": 4.01, "learning_rate": 3.7537537537537536e-05, "loss": 1.1474, "step": 500 }, { "epoch": 4.01, "learning_rate": 3.8288288288288285e-05, "loss": 1.1879, "step": 510 }, { "epoch": 4.01, "learning_rate": 3.903903903903904e-05, "loss": 1.0165, "step": 520 }, { "epoch": 4.01, "learning_rate": 3.9789789789789796e-05, "loss": 1.2291, "step": 530 }, { "epoch": 4.01, "learning_rate": 4.0540540540540545e-05, "loss": 1.2309, "step": 540 }, { "epoch": 4.02, "learning_rate": 4.1291291291291294e-05, "loss": 1.0638, "step": 550 }, { "epoch": 4.02, "learning_rate": 4.204204204204204e-05, "loss": 1.2217, "step": 560 }, { "epoch": 4.02, "eval_accuracy": 0.391705069124424, "eval_loss": 1.3385610580444336, "eval_runtime": 242.8726, "eval_samples_per_second": 0.893, "eval_steps_per_second": 0.226, "step": 560 }, { "epoch": 5.0, "learning_rate": 4.27927927927928e-05, "loss": 1.4069, "step": 570 }, { "epoch": 5.0, "learning_rate": 4.354354354354355e-05, "loss": 1.26, "step": 580 }, { "epoch": 5.0, "learning_rate": 4.42942942942943e-05, "loss": 1.2424, "step": 590 }, { "epoch": 5.01, "learning_rate": 4.5045045045045046e-05, "loss": 1.2978, "step": 600 }, { "epoch": 5.01, "learning_rate": 4.57957957957958e-05, "loss": 1.373, "step": 610 }, { "epoch": 5.01, "learning_rate": 4.654654654654655e-05, "loss": 1.2995, "step": 620 }, { "epoch": 5.01, "learning_rate": 4.72972972972973e-05, "loss": 1.053, "step": 630 }, { "epoch": 5.01, "learning_rate": 4.804804804804805e-05, "loss": 1.1977, "step": 640 }, { "epoch": 5.01, "learning_rate": 4.87987987987988e-05, "loss": 1.2698, "step": 650 }, { "epoch": 5.02, "learning_rate": 4.954954954954955e-05, "loss": 1.1812, "step": 660 }, { "epoch": 5.02, "learning_rate": 4.9966633299966636e-05, "loss": 1.2577, "step": 670 }, { "epoch": 5.02, "eval_accuracy": 0.5391705069124424, "eval_loss": 1.2195504903793335, "eval_runtime": 264.2835, "eval_samples_per_second": 0.821, "eval_steps_per_second": 0.208, "step": 672 }, { "epoch": 6.0, "learning_rate": 4.988321654988322e-05, "loss": 1.4038, "step": 680 }, { "epoch": 6.0, "learning_rate": 4.97997997997998e-05, "loss": 1.4382, "step": 690 }, { "epoch": 6.0, "learning_rate": 4.9716383049716386e-05, "loss": 1.3709, "step": 700 }, { "epoch": 6.01, "learning_rate": 4.963296629963297e-05, "loss": 1.0241, "step": 710 }, { "epoch": 6.01, "learning_rate": 4.954954954954955e-05, "loss": 1.5423, "step": 720 }, { "epoch": 6.01, "learning_rate": 4.9466132799466136e-05, "loss": 1.2711, "step": 730 }, { "epoch": 6.01, "learning_rate": 4.938271604938271e-05, "loss": 1.4259, "step": 740 }, { "epoch": 6.01, "learning_rate": 4.92992992992993e-05, "loss": 1.2684, "step": 750 }, { "epoch": 6.01, "learning_rate": 4.921588254921589e-05, "loss": 0.9547, "step": 760 }, { "epoch": 6.01, "learning_rate": 4.9132465799132463e-05, "loss": 1.1885, "step": 770 }, { "epoch": 6.02, "learning_rate": 4.9049049049049054e-05, "loss": 1.0121, "step": 780 }, { "epoch": 6.02, "eval_accuracy": 0.4792626728110599, "eval_loss": 1.2319154739379883, "eval_runtime": 261.8591, "eval_samples_per_second": 0.829, "eval_steps_per_second": 0.21, "step": 784 }, { "epoch": 7.0, "learning_rate": 4.896563229896564e-05, "loss": 1.355, "step": 790 }, { "epoch": 7.0, "learning_rate": 4.8882215548882214e-05, "loss": 1.4279, "step": 800 }, { "epoch": 7.0, "learning_rate": 4.87987987987988e-05, "loss": 1.0318, "step": 810 }, { "epoch": 7.01, "learning_rate": 4.871538204871539e-05, "loss": 1.1375, "step": 820 }, { "epoch": 7.01, "learning_rate": 4.8631965298631964e-05, "loss": 1.1721, "step": 830 }, { "epoch": 7.01, "learning_rate": 4.854854854854855e-05, "loss": 1.2527, "step": 840 }, { "epoch": 7.01, "learning_rate": 4.846513179846514e-05, "loss": 1.0756, "step": 850 }, { "epoch": 7.01, "learning_rate": 4.8381715048381715e-05, "loss": 1.1513, "step": 860 }, { "epoch": 7.01, "learning_rate": 4.82982982982983e-05, "loss": 1.0789, "step": 870 }, { "epoch": 7.01, "learning_rate": 4.821488154821489e-05, "loss": 1.2463, "step": 880 }, { "epoch": 7.02, "learning_rate": 4.8131464798131465e-05, "loss": 1.2485, "step": 890 }, { "epoch": 7.02, "eval_accuracy": 0.7511520737327189, "eval_loss": 0.8229795098304749, "eval_runtime": 252.4438, "eval_samples_per_second": 0.86, "eval_steps_per_second": 0.218, "step": 896 }, { "epoch": 8.0, "learning_rate": 4.804804804804805e-05, "loss": 0.9738, "step": 900 }, { "epoch": 8.0, "learning_rate": 4.796463129796463e-05, "loss": 0.7946, "step": 910 }, { "epoch": 8.0, "learning_rate": 4.7881214547881215e-05, "loss": 1.2819, "step": 920 }, { "epoch": 8.01, "learning_rate": 4.77977977977978e-05, "loss": 1.7354, "step": 930 }, { "epoch": 8.01, "learning_rate": 4.771438104771438e-05, "loss": 1.161, "step": 940 }, { "epoch": 8.01, "learning_rate": 4.7630964297630966e-05, "loss": 1.3311, "step": 950 }, { "epoch": 8.01, "learning_rate": 4.754754754754755e-05, "loss": 0.9299, "step": 960 }, { "epoch": 8.01, "learning_rate": 4.746413079746413e-05, "loss": 0.9622, "step": 970 }, { "epoch": 8.01, "learning_rate": 4.7380714047380716e-05, "loss": 0.9557, "step": 980 }, { "epoch": 8.01, "learning_rate": 4.72972972972973e-05, "loss": 0.9241, "step": 990 }, { "epoch": 8.02, "learning_rate": 4.721388054721388e-05, "loss": 1.025, "step": 1000 }, { "epoch": 8.02, "eval_accuracy": 0.6866359447004609, "eval_loss": 0.8022767305374146, "eval_runtime": 256.9331, "eval_samples_per_second": 0.845, "eval_steps_per_second": 0.214, "step": 1008 }, { "epoch": 9.0, "learning_rate": 4.7130463797130466e-05, "loss": 1.6034, "step": 1010 }, { "epoch": 9.0, "learning_rate": 4.704704704704705e-05, "loss": 1.0115, "step": 1020 }, { "epoch": 9.0, "learning_rate": 4.696363029696363e-05, "loss": 1.337, "step": 1030 }, { "epoch": 9.0, "learning_rate": 4.688021354688022e-05, "loss": 1.351, "step": 1040 }, { "epoch": 9.01, "learning_rate": 4.67967967967968e-05, "loss": 1.0992, "step": 1050 }, { "epoch": 9.01, "learning_rate": 4.6713380046713384e-05, "loss": 1.0828, "step": 1060 }, { "epoch": 9.01, "learning_rate": 4.662996329662997e-05, "loss": 1.0086, "step": 1070 }, { "epoch": 9.01, "learning_rate": 4.654654654654655e-05, "loss": 0.8658, "step": 1080 }, { "epoch": 9.01, "learning_rate": 4.6463129796463134e-05, "loss": 0.7355, "step": 1090 }, { "epoch": 9.01, "learning_rate": 4.637971304637971e-05, "loss": 1.0191, "step": 1100 }, { "epoch": 9.02, "learning_rate": 4.62962962962963e-05, "loss": 1.0171, "step": 1110 }, { "epoch": 9.02, "learning_rate": 4.6212879546212884e-05, "loss": 1.2952, "step": 1120 }, { "epoch": 9.02, "eval_accuracy": 0.6036866359447005, "eval_loss": 0.9130178093910217, "eval_runtime": 256.4943, "eval_samples_per_second": 0.846, "eval_steps_per_second": 0.214, "step": 1120 }, { "epoch": 10.0, "learning_rate": 4.612946279612946e-05, "loss": 0.8975, "step": 1130 }, { "epoch": 10.0, "learning_rate": 4.604604604604605e-05, "loss": 1.0215, "step": 1140 }, { "epoch": 10.0, "learning_rate": 4.5962629295962635e-05, "loss": 0.9562, "step": 1150 }, { "epoch": 10.01, "learning_rate": 4.587921254587921e-05, "loss": 1.2006, "step": 1160 }, { "epoch": 10.01, "learning_rate": 4.57957957957958e-05, "loss": 1.2811, "step": 1170 }, { "epoch": 10.01, "learning_rate": 4.5712379045712385e-05, "loss": 1.0057, "step": 1180 }, { "epoch": 10.01, "learning_rate": 4.562896229562896e-05, "loss": 1.0403, "step": 1190 }, { "epoch": 10.01, "learning_rate": 4.5545545545545545e-05, "loss": 0.8273, "step": 1200 }, { "epoch": 10.01, "learning_rate": 4.5462128795462135e-05, "loss": 0.7807, "step": 1210 }, { "epoch": 10.02, "learning_rate": 4.537871204537871e-05, "loss": 0.9059, "step": 1220 }, { "epoch": 10.02, "learning_rate": 4.5295295295295295e-05, "loss": 0.9499, "step": 1230 }, { "epoch": 10.02, "eval_accuracy": 0.6036866359447005, "eval_loss": 1.0620718002319336, "eval_runtime": 255.1418, "eval_samples_per_second": 0.851, "eval_steps_per_second": 0.216, "step": 1232 }, { "epoch": 11.0, "learning_rate": 4.5211878545211886e-05, "loss": 1.0751, "step": 1240 }, { "epoch": 11.0, "learning_rate": 4.512846179512846e-05, "loss": 1.034, "step": 1250 }, { "epoch": 11.0, "learning_rate": 4.5045045045045046e-05, "loss": 0.6693, "step": 1260 }, { "epoch": 11.01, "learning_rate": 4.4961628294961636e-05, "loss": 1.0748, "step": 1270 }, { "epoch": 11.01, "learning_rate": 4.487821154487821e-05, "loss": 0.9789, "step": 1280 }, { "epoch": 11.01, "learning_rate": 4.4794794794794796e-05, "loss": 0.8083, "step": 1290 }, { "epoch": 11.01, "learning_rate": 4.471137804471138e-05, "loss": 0.9976, "step": 1300 }, { "epoch": 11.01, "learning_rate": 4.462796129462796e-05, "loss": 0.7674, "step": 1310 }, { "epoch": 11.01, "learning_rate": 4.4544544544544546e-05, "loss": 0.8705, "step": 1320 }, { "epoch": 11.01, "learning_rate": 4.446112779446113e-05, "loss": 1.195, "step": 1330 }, { "epoch": 11.02, "learning_rate": 4.437771104437771e-05, "loss": 0.8805, "step": 1340 }, { "epoch": 11.02, "eval_accuracy": 0.7050691244239631, "eval_loss": 0.871336042881012, "eval_runtime": 256.7263, "eval_samples_per_second": 0.845, "eval_steps_per_second": 0.214, "step": 1344 }, { "epoch": 12.0, "learning_rate": 4.42942942942943e-05, "loss": 0.8859, "step": 1350 }, { "epoch": 12.0, "learning_rate": 4.421087754421088e-05, "loss": 1.2024, "step": 1360 }, { "epoch": 12.0, "learning_rate": 4.4127460794127464e-05, "loss": 0.9896, "step": 1370 }, { "epoch": 12.01, "learning_rate": 4.404404404404405e-05, "loss": 0.9208, "step": 1380 }, { "epoch": 12.01, "learning_rate": 4.3960627293960624e-05, "loss": 1.2375, "step": 1390 }, { "epoch": 12.01, "learning_rate": 4.3877210543877214e-05, "loss": 0.9493, "step": 1400 }, { "epoch": 12.01, "learning_rate": 4.37937937937938e-05, "loss": 0.9476, "step": 1410 }, { "epoch": 12.01, "learning_rate": 4.3710377043710374e-05, "loss": 0.8137, "step": 1420 }, { "epoch": 12.01, "learning_rate": 4.3626960293626964e-05, "loss": 0.854, "step": 1430 }, { "epoch": 12.01, "learning_rate": 4.354354354354355e-05, "loss": 1.0682, "step": 1440 }, { "epoch": 12.02, "learning_rate": 4.3460126793460125e-05, "loss": 1.2066, "step": 1450 }, { "epoch": 12.02, "eval_accuracy": 0.5852534562211982, "eval_loss": 0.9363781809806824, "eval_runtime": 263.3253, "eval_samples_per_second": 0.824, "eval_steps_per_second": 0.209, "step": 1456 }, { "epoch": 13.0, "learning_rate": 4.3376710043376715e-05, "loss": 0.7223, "step": 1460 }, { "epoch": 13.0, "learning_rate": 4.32932932932933e-05, "loss": 0.7549, "step": 1470 }, { "epoch": 13.0, "learning_rate": 4.3209876543209875e-05, "loss": 0.9539, "step": 1480 }, { "epoch": 13.01, "learning_rate": 4.312645979312646e-05, "loss": 0.8501, "step": 1490 }, { "epoch": 13.01, "learning_rate": 4.304304304304305e-05, "loss": 0.9684, "step": 1500 }, { "epoch": 13.01, "learning_rate": 4.2959626292959625e-05, "loss": 1.3551, "step": 1510 }, { "epoch": 13.01, "learning_rate": 4.287620954287621e-05, "loss": 0.9607, "step": 1520 }, { "epoch": 13.01, "learning_rate": 4.27927927927928e-05, "loss": 0.8907, "step": 1530 }, { "epoch": 13.01, "learning_rate": 4.2709376042709376e-05, "loss": 1.2041, "step": 1540 }, { "epoch": 13.01, "learning_rate": 4.262595929262596e-05, "loss": 0.7261, "step": 1550 }, { "epoch": 13.02, "learning_rate": 4.254254254254255e-05, "loss": 0.9358, "step": 1560 }, { "epoch": 13.02, "eval_accuracy": 0.5852534562211982, "eval_loss": 0.910686731338501, "eval_runtime": 265.3055, "eval_samples_per_second": 0.818, "eval_steps_per_second": 0.207, "step": 1568 }, { "epoch": 14.0, "learning_rate": 4.2459125792459126e-05, "loss": 0.9092, "step": 1570 }, { "epoch": 14.0, "learning_rate": 4.237570904237571e-05, "loss": 0.8376, "step": 1580 }, { "epoch": 14.0, "learning_rate": 4.229229229229229e-05, "loss": 1.0121, "step": 1590 }, { "epoch": 14.0, "learning_rate": 4.2208875542208876e-05, "loss": 0.9361, "step": 1600 }, { "epoch": 14.01, "learning_rate": 4.212545879212546e-05, "loss": 1.053, "step": 1610 }, { "epoch": 14.01, "learning_rate": 4.204204204204204e-05, "loss": 0.9652, "step": 1620 }, { "epoch": 14.01, "learning_rate": 4.1958625291958627e-05, "loss": 0.7609, "step": 1630 }, { "epoch": 14.01, "learning_rate": 4.187520854187521e-05, "loss": 1.0871, "step": 1640 }, { "epoch": 14.01, "learning_rate": 4.1791791791791793e-05, "loss": 0.9353, "step": 1650 }, { "epoch": 14.01, "learning_rate": 4.170837504170838e-05, "loss": 0.7793, "step": 1660 }, { "epoch": 14.02, "learning_rate": 4.162495829162496e-05, "loss": 1.0073, "step": 1670 }, { "epoch": 14.02, "learning_rate": 4.1541541541541544e-05, "loss": 0.9043, "step": 1680 }, { "epoch": 14.02, "eval_accuracy": 0.6359447004608295, "eval_loss": 0.9146963953971863, "eval_runtime": 259.9831, "eval_samples_per_second": 0.835, "eval_steps_per_second": 0.212, "step": 1680 }, { "epoch": 15.0, "learning_rate": 4.145812479145813e-05, "loss": 1.0818, "step": 1690 }, { "epoch": 15.0, "learning_rate": 4.137470804137471e-05, "loss": 0.9315, "step": 1700 }, { "epoch": 15.0, "learning_rate": 4.1291291291291294e-05, "loss": 0.8777, "step": 1710 }, { "epoch": 15.01, "learning_rate": 4.120787454120788e-05, "loss": 1.0695, "step": 1720 }, { "epoch": 15.01, "learning_rate": 4.112445779112446e-05, "loss": 0.6343, "step": 1730 }, { "epoch": 15.01, "learning_rate": 4.1041041041041045e-05, "loss": 1.0106, "step": 1740 }, { "epoch": 15.01, "learning_rate": 4.095762429095763e-05, "loss": 0.9499, "step": 1750 }, { "epoch": 15.01, "learning_rate": 4.087420754087421e-05, "loss": 0.9108, "step": 1760 }, { "epoch": 15.01, "learning_rate": 4.0790790790790795e-05, "loss": 0.8515, "step": 1770 }, { "epoch": 15.02, "learning_rate": 4.070737404070737e-05, "loss": 0.749, "step": 1780 }, { "epoch": 15.02, "learning_rate": 4.062395729062396e-05, "loss": 0.8383, "step": 1790 }, { "epoch": 15.02, "eval_accuracy": 0.6359447004608295, "eval_loss": 0.945084810256958, "eval_runtime": 259.6462, "eval_samples_per_second": 0.836, "eval_steps_per_second": 0.212, "step": 1792 }, { "epoch": 16.0, "learning_rate": 4.0540540540540545e-05, "loss": 0.8241, "step": 1800 }, { "epoch": 16.0, "learning_rate": 4.045712379045712e-05, "loss": 0.7361, "step": 1810 }, { "epoch": 16.0, "learning_rate": 4.037370704037371e-05, "loss": 0.978, "step": 1820 }, { "epoch": 16.01, "learning_rate": 4.0290290290290296e-05, "loss": 0.7197, "step": 1830 }, { "epoch": 16.01, "learning_rate": 4.020687354020687e-05, "loss": 1.0322, "step": 1840 }, { "epoch": 16.01, "learning_rate": 4.012345679012346e-05, "loss": 0.8089, "step": 1850 }, { "epoch": 16.01, "learning_rate": 4.0040040040040046e-05, "loss": 0.8855, "step": 1860 }, { "epoch": 16.01, "learning_rate": 3.995662328995662e-05, "loss": 0.9764, "step": 1870 }, { "epoch": 16.01, "learning_rate": 3.9873206539873206e-05, "loss": 0.7759, "step": 1880 }, { "epoch": 16.01, "learning_rate": 3.9789789789789796e-05, "loss": 0.5536, "step": 1890 }, { "epoch": 16.02, "learning_rate": 3.970637303970637e-05, "loss": 0.7482, "step": 1900 }, { "epoch": 16.02, "eval_accuracy": 0.6221198156682027, "eval_loss": 0.876456618309021, "eval_runtime": 273.3147, "eval_samples_per_second": 0.794, "eval_steps_per_second": 0.201, "step": 1904 }, { "epoch": 17.0, "learning_rate": 3.9622956289622956e-05, "loss": 0.6662, "step": 1910 }, { "epoch": 17.0, "learning_rate": 3.953953953953955e-05, "loss": 0.857, "step": 1920 }, { "epoch": 17.0, "learning_rate": 3.945612278945612e-05, "loss": 0.8507, "step": 1930 }, { "epoch": 17.01, "learning_rate": 3.937270603937271e-05, "loss": 1.1261, "step": 1940 }, { "epoch": 17.01, "learning_rate": 3.92892892892893e-05, "loss": 0.9851, "step": 1950 }, { "epoch": 17.01, "learning_rate": 3.9205872539205874e-05, "loss": 0.5859, "step": 1960 }, { "epoch": 17.01, "learning_rate": 3.912245578912246e-05, "loss": 0.9539, "step": 1970 }, { "epoch": 17.01, "learning_rate": 3.903903903903904e-05, "loss": 0.8663, "step": 1980 }, { "epoch": 17.01, "learning_rate": 3.8955622288955624e-05, "loss": 0.9946, "step": 1990 }, { "epoch": 17.01, "learning_rate": 3.887220553887221e-05, "loss": 0.835, "step": 2000 }, { "epoch": 17.02, "learning_rate": 3.878878878878879e-05, "loss": 0.9547, "step": 2010 }, { "epoch": 17.02, "eval_accuracy": 0.7649769585253456, "eval_loss": 0.7997832894325256, "eval_runtime": 275.0933, "eval_samples_per_second": 0.789, "eval_steps_per_second": 0.2, "step": 2016 }, { "epoch": 18.0, "learning_rate": 3.8705372038705374e-05, "loss": 0.8025, "step": 2020 }, { "epoch": 18.0, "learning_rate": 3.862195528862196e-05, "loss": 0.9637, "step": 2030 }, { "epoch": 18.0, "learning_rate": 3.8538538538538534e-05, "loss": 1.1231, "step": 2040 }, { "epoch": 18.01, "learning_rate": 3.8455121788455125e-05, "loss": 0.6693, "step": 2050 }, { "epoch": 18.01, "learning_rate": 3.837170503837171e-05, "loss": 1.0535, "step": 2060 }, { "epoch": 18.01, "learning_rate": 3.8288288288288285e-05, "loss": 0.9, "step": 2070 }, { "epoch": 18.01, "learning_rate": 3.8204871538204875e-05, "loss": 0.7855, "step": 2080 }, { "epoch": 18.01, "learning_rate": 3.812145478812146e-05, "loss": 0.7349, "step": 2090 }, { "epoch": 18.01, "learning_rate": 3.8038038038038035e-05, "loss": 0.9599, "step": 2100 }, { "epoch": 18.01, "learning_rate": 3.7954621287954625e-05, "loss": 0.6589, "step": 2110 }, { "epoch": 18.02, "learning_rate": 3.787120453787121e-05, "loss": 0.7028, "step": 2120 }, { "epoch": 18.02, "eval_accuracy": 0.6405529953917051, "eval_loss": 0.9256826639175415, "eval_runtime": 263.4393, "eval_samples_per_second": 0.824, "eval_steps_per_second": 0.209, "step": 2128 }, { "epoch": 19.0, "learning_rate": 3.7787787787787786e-05, "loss": 0.5654, "step": 2130 }, { "epoch": 19.0, "learning_rate": 3.7704371037704376e-05, "loss": 0.8278, "step": 2140 }, { "epoch": 19.0, "learning_rate": 3.762095428762096e-05, "loss": 1.0629, "step": 2150 }, { "epoch": 19.0, "learning_rate": 3.7537537537537536e-05, "loss": 1.026, "step": 2160 }, { "epoch": 19.01, "learning_rate": 3.745412078745412e-05, "loss": 0.8544, "step": 2170 }, { "epoch": 19.01, "learning_rate": 3.737070403737071e-05, "loss": 0.8669, "step": 2180 }, { "epoch": 19.01, "learning_rate": 3.7287287287287286e-05, "loss": 0.6044, "step": 2190 }, { "epoch": 19.01, "learning_rate": 3.720387053720387e-05, "loss": 0.8129, "step": 2200 }, { "epoch": 19.01, "learning_rate": 3.712045378712046e-05, "loss": 0.5283, "step": 2210 }, { "epoch": 19.01, "learning_rate": 3.7037037037037037e-05, "loss": 1.0115, "step": 2220 }, { "epoch": 19.02, "learning_rate": 3.695362028695362e-05, "loss": 0.7743, "step": 2230 }, { "epoch": 19.02, "learning_rate": 3.687020353687021e-05, "loss": 0.8659, "step": 2240 }, { "epoch": 19.02, "eval_accuracy": 0.5852534562211982, "eval_loss": 1.0655064582824707, "eval_runtime": 271.2554, "eval_samples_per_second": 0.8, "eval_steps_per_second": 0.203, "step": 2240 }, { "epoch": 20.0, "learning_rate": 3.678678678678679e-05, "loss": 0.8819, "step": 2250 }, { "epoch": 20.0, "learning_rate": 3.670337003670337e-05, "loss": 0.7077, "step": 2260 }, { "epoch": 20.0, "learning_rate": 3.6619953286619954e-05, "loss": 0.737, "step": 2270 }, { "epoch": 20.01, "learning_rate": 3.653653653653654e-05, "loss": 0.8414, "step": 2280 }, { "epoch": 20.01, "learning_rate": 3.645311978645312e-05, "loss": 0.8566, "step": 2290 }, { "epoch": 20.01, "learning_rate": 3.6369703036369704e-05, "loss": 0.8155, "step": 2300 }, { "epoch": 20.01, "learning_rate": 3.628628628628629e-05, "loss": 0.6651, "step": 2310 }, { "epoch": 20.01, "learning_rate": 3.620286953620287e-05, "loss": 0.7734, "step": 2320 }, { "epoch": 20.01, "learning_rate": 3.6119452786119454e-05, "loss": 0.9056, "step": 2330 }, { "epoch": 20.02, "learning_rate": 3.603603603603604e-05, "loss": 0.9031, "step": 2340 }, { "epoch": 20.02, "learning_rate": 3.595261928595262e-05, "loss": 0.5591, "step": 2350 }, { "epoch": 20.02, "eval_accuracy": 0.576036866359447, "eval_loss": 1.2793947458267212, "eval_runtime": 265.9827, "eval_samples_per_second": 0.816, "eval_steps_per_second": 0.207, "step": 2352 }, { "epoch": 21.0, "learning_rate": 3.5869202535869205e-05, "loss": 0.6663, "step": 2360 }, { "epoch": 21.0, "learning_rate": 3.578578578578579e-05, "loss": 0.6847, "step": 2370 }, { "epoch": 21.0, "learning_rate": 3.570236903570237e-05, "loss": 0.8432, "step": 2380 }, { "epoch": 21.01, "learning_rate": 3.5618952285618955e-05, "loss": 0.6488, "step": 2390 }, { "epoch": 21.01, "learning_rate": 3.553553553553554e-05, "loss": 0.9092, "step": 2400 }, { "epoch": 21.01, "learning_rate": 3.545211878545212e-05, "loss": 0.8903, "step": 2410 }, { "epoch": 21.01, "learning_rate": 3.5368702035368706e-05, "loss": 0.4662, "step": 2420 }, { "epoch": 21.01, "learning_rate": 3.528528528528528e-05, "loss": 0.9775, "step": 2430 }, { "epoch": 21.01, "learning_rate": 3.520186853520187e-05, "loss": 0.973, "step": 2440 }, { "epoch": 21.01, "learning_rate": 3.5118451785118456e-05, "loss": 0.5741, "step": 2450 }, { "epoch": 21.02, "learning_rate": 3.503503503503503e-05, "loss": 0.8963, "step": 2460 }, { "epoch": 21.02, "eval_accuracy": 0.695852534562212, "eval_loss": 1.0049302577972412, "eval_runtime": 270.1661, "eval_samples_per_second": 0.803, "eval_steps_per_second": 0.204, "step": 2464 }, { "epoch": 22.0, "learning_rate": 3.495161828495162e-05, "loss": 0.7073, "step": 2470 }, { "epoch": 22.0, "learning_rate": 3.4868201534868206e-05, "loss": 0.5915, "step": 2480 }, { "epoch": 22.0, "learning_rate": 3.478478478478478e-05, "loss": 0.3567, "step": 2490 }, { "epoch": 22.01, "learning_rate": 3.470136803470137e-05, "loss": 0.7007, "step": 2500 }, { "epoch": 22.01, "learning_rate": 3.4617951284617957e-05, "loss": 0.4253, "step": 2510 }, { "epoch": 22.01, "learning_rate": 3.453453453453453e-05, "loss": 0.9297, "step": 2520 }, { "epoch": 22.01, "learning_rate": 3.4451117784451123e-05, "loss": 0.9365, "step": 2530 }, { "epoch": 22.01, "learning_rate": 3.436770103436771e-05, "loss": 0.4607, "step": 2540 }, { "epoch": 22.01, "learning_rate": 3.4284284284284284e-05, "loss": 0.8208, "step": 2550 }, { "epoch": 22.01, "learning_rate": 3.420086753420087e-05, "loss": 0.8313, "step": 2560 }, { "epoch": 22.02, "learning_rate": 3.411745078411746e-05, "loss": 0.9221, "step": 2570 }, { "epoch": 22.02, "eval_accuracy": 0.6082949308755761, "eval_loss": 1.111289620399475, "eval_runtime": 272.9644, "eval_samples_per_second": 0.795, "eval_steps_per_second": 0.201, "step": 2576 }, { "epoch": 23.0, "learning_rate": 3.4034034034034034e-05, "loss": 0.8386, "step": 2580 }, { "epoch": 23.0, "learning_rate": 3.395061728395062e-05, "loss": 0.549, "step": 2590 }, { "epoch": 23.0, "learning_rate": 3.386720053386721e-05, "loss": 1.0528, "step": 2600 }, { "epoch": 23.01, "learning_rate": 3.3783783783783784e-05, "loss": 0.6398, "step": 2610 }, { "epoch": 23.01, "learning_rate": 3.370036703370037e-05, "loss": 0.7276, "step": 2620 }, { "epoch": 23.01, "learning_rate": 3.361695028361695e-05, "loss": 0.8483, "step": 2630 }, { "epoch": 23.01, "learning_rate": 3.3533533533533535e-05, "loss": 0.5737, "step": 2640 }, { "epoch": 23.01, "learning_rate": 3.345011678345012e-05, "loss": 0.588, "step": 2650 }, { "epoch": 23.01, "learning_rate": 3.33667000333667e-05, "loss": 0.6416, "step": 2660 }, { "epoch": 23.01, "learning_rate": 3.3283283283283285e-05, "loss": 0.7938, "step": 2670 }, { "epoch": 23.02, "learning_rate": 3.319986653319987e-05, "loss": 0.7154, "step": 2680 }, { "epoch": 23.02, "eval_accuracy": 0.6405529953917051, "eval_loss": 0.9371297359466553, "eval_runtime": 283.5798, "eval_samples_per_second": 0.765, "eval_steps_per_second": 0.194, "step": 2688 }, { "epoch": 24.0, "learning_rate": 3.311644978311645e-05, "loss": 0.4492, "step": 2690 }, { "epoch": 24.0, "learning_rate": 3.3033033033033035e-05, "loss": 0.6304, "step": 2700 }, { "epoch": 24.0, "learning_rate": 3.294961628294962e-05, "loss": 0.7286, "step": 2710 }, { "epoch": 24.0, "learning_rate": 3.2866199532866195e-05, "loss": 0.9193, "step": 2720 }, { "epoch": 24.01, "learning_rate": 3.2782782782782786e-05, "loss": 0.7511, "step": 2730 }, { "epoch": 24.01, "learning_rate": 3.269936603269937e-05, "loss": 0.7076, "step": 2740 }, { "epoch": 24.01, "learning_rate": 3.2615949282615946e-05, "loss": 0.7735, "step": 2750 }, { "epoch": 24.01, "learning_rate": 3.2532532532532536e-05, "loss": 0.9404, "step": 2760 }, { "epoch": 24.01, "learning_rate": 3.244911578244912e-05, "loss": 0.9553, "step": 2770 }, { "epoch": 24.01, "learning_rate": 3.2365699032365696e-05, "loss": 1.0067, "step": 2780 }, { "epoch": 24.02, "learning_rate": 3.2282282282282286e-05, "loss": 0.7712, "step": 2790 }, { "epoch": 24.02, "learning_rate": 3.219886553219887e-05, "loss": 0.8795, "step": 2800 }, { "epoch": 24.02, "eval_accuracy": 0.7235023041474654, "eval_loss": 0.6837921738624573, "eval_runtime": 264.0243, "eval_samples_per_second": 0.822, "eval_steps_per_second": 0.208, "step": 2800 }, { "epoch": 25.0, "learning_rate": 3.2115448782115447e-05, "loss": 0.6295, "step": 2810 }, { "epoch": 25.0, "learning_rate": 3.203203203203203e-05, "loss": 0.5106, "step": 2820 }, { "epoch": 25.0, "learning_rate": 3.194861528194862e-05, "loss": 0.6515, "step": 2830 }, { "epoch": 25.01, "learning_rate": 3.18651985318652e-05, "loss": 0.5182, "step": 2840 }, { "epoch": 25.01, "learning_rate": 3.178178178178178e-05, "loss": 0.9393, "step": 2850 }, { "epoch": 25.01, "learning_rate": 3.169836503169837e-05, "loss": 0.6204, "step": 2860 }, { "epoch": 25.01, "learning_rate": 3.161494828161495e-05, "loss": 0.5407, "step": 2870 }, { "epoch": 25.01, "learning_rate": 3.153153153153153e-05, "loss": 0.8556, "step": 2880 }, { "epoch": 25.01, "learning_rate": 3.144811478144812e-05, "loss": 0.6423, "step": 2890 }, { "epoch": 25.02, "learning_rate": 3.13646980313647e-05, "loss": 0.8295, "step": 2900 }, { "epoch": 25.02, "learning_rate": 3.128128128128128e-05, "loss": 0.631, "step": 2910 }, { "epoch": 25.02, "eval_accuracy": 0.6129032258064516, "eval_loss": 1.209276556968689, "eval_runtime": 285.1576, "eval_samples_per_second": 0.761, "eval_steps_per_second": 0.193, "step": 2912 }, { "epoch": 26.0, "learning_rate": 3.119786453119787e-05, "loss": 0.7184, "step": 2920 }, { "epoch": 26.0, "learning_rate": 3.111444778111445e-05, "loss": 0.6305, "step": 2930 }, { "epoch": 26.0, "learning_rate": 3.103103103103103e-05, "loss": 0.7949, "step": 2940 }, { "epoch": 26.01, "learning_rate": 3.0947614280947615e-05, "loss": 1.2476, "step": 2950 }, { "epoch": 26.01, "learning_rate": 3.08641975308642e-05, "loss": 0.7922, "step": 2960 }, { "epoch": 26.01, "learning_rate": 3.078078078078078e-05, "loss": 0.581, "step": 2970 }, { "epoch": 26.01, "learning_rate": 3.0697364030697365e-05, "loss": 0.6329, "step": 2980 }, { "epoch": 26.01, "learning_rate": 3.061394728061395e-05, "loss": 0.8019, "step": 2990 }, { "epoch": 26.01, "learning_rate": 3.053053053053053e-05, "loss": 0.7641, "step": 3000 }, { "epoch": 26.01, "learning_rate": 3.044711378044712e-05, "loss": 0.6169, "step": 3010 }, { "epoch": 26.02, "learning_rate": 3.0363697030363696e-05, "loss": 1.0489, "step": 3020 }, { "epoch": 26.02, "eval_accuracy": 0.5483870967741935, "eval_loss": 1.472022294998169, "eval_runtime": 262.7206, "eval_samples_per_second": 0.826, "eval_steps_per_second": 0.209, "step": 3024 }, { "epoch": 27.0, "learning_rate": 3.0280280280280282e-05, "loss": 0.5064, "step": 3030 }, { "epoch": 27.0, "learning_rate": 3.0196863530196866e-05, "loss": 0.493, "step": 3040 }, { "epoch": 27.0, "learning_rate": 3.0113446780113446e-05, "loss": 0.6668, "step": 3050 }, { "epoch": 27.01, "learning_rate": 3.0030030030030033e-05, "loss": 0.5836, "step": 3060 }, { "epoch": 27.01, "learning_rate": 2.9946613279946616e-05, "loss": 0.997, "step": 3070 }, { "epoch": 27.01, "learning_rate": 2.9863196529863196e-05, "loss": 0.8393, "step": 3080 }, { "epoch": 27.01, "learning_rate": 2.9779779779779783e-05, "loss": 0.609, "step": 3090 }, { "epoch": 27.01, "learning_rate": 2.9696363029696367e-05, "loss": 0.7111, "step": 3100 }, { "epoch": 27.01, "learning_rate": 2.9612946279612947e-05, "loss": 0.7491, "step": 3110 }, { "epoch": 27.01, "learning_rate": 2.952952952952953e-05, "loss": 0.69, "step": 3120 }, { "epoch": 27.02, "learning_rate": 2.9446112779446117e-05, "loss": 0.5881, "step": 3130 }, { "epoch": 27.02, "eval_accuracy": 0.631336405529954, "eval_loss": 1.1905186176300049, "eval_runtime": 281.4297, "eval_samples_per_second": 0.771, "eval_steps_per_second": 0.195, "step": 3136 }, { "epoch": 28.0, "learning_rate": 2.9362696029362697e-05, "loss": 1.0567, "step": 3140 }, { "epoch": 28.0, "learning_rate": 2.927927927927928e-05, "loss": 0.5978, "step": 3150 }, { "epoch": 28.0, "learning_rate": 2.9195862529195867e-05, "loss": 0.5434, "step": 3160 }, { "epoch": 28.01, "learning_rate": 2.9112445779112447e-05, "loss": 0.6441, "step": 3170 }, { "epoch": 28.01, "learning_rate": 2.902902902902903e-05, "loss": 0.8564, "step": 3180 }, { "epoch": 28.01, "learning_rate": 2.8945612278945618e-05, "loss": 0.7492, "step": 3190 }, { "epoch": 28.01, "learning_rate": 2.8862195528862194e-05, "loss": 0.7439, "step": 3200 }, { "epoch": 28.01, "learning_rate": 2.877877877877878e-05, "loss": 0.731, "step": 3210 }, { "epoch": 28.01, "learning_rate": 2.8695362028695365e-05, "loss": 0.8052, "step": 3220 }, { "epoch": 28.01, "learning_rate": 2.8611945278611945e-05, "loss": 0.8463, "step": 3230 }, { "epoch": 28.02, "learning_rate": 2.852852852852853e-05, "loss": 0.7919, "step": 3240 }, { "epoch": 28.02, "eval_accuracy": 0.576036866359447, "eval_loss": 1.1292119026184082, "eval_runtime": 300.8957, "eval_samples_per_second": 0.721, "eval_steps_per_second": 0.183, "step": 3248 }, { "epoch": 29.0, "learning_rate": 2.844511177844511e-05, "loss": 0.9881, "step": 3250 }, { "epoch": 29.0, "learning_rate": 2.8361695028361695e-05, "loss": 0.4171, "step": 3260 }, { "epoch": 29.0, "learning_rate": 2.8278278278278282e-05, "loss": 0.4612, "step": 3270 }, { "epoch": 29.0, "learning_rate": 2.8194861528194862e-05, "loss": 0.6761, "step": 3280 }, { "epoch": 29.01, "learning_rate": 2.8111444778111445e-05, "loss": 0.4762, "step": 3290 }, { "epoch": 29.01, "learning_rate": 2.8028028028028032e-05, "loss": 0.6633, "step": 3300 }, { "epoch": 29.01, "learning_rate": 2.794461127794461e-05, "loss": 0.5644, "step": 3310 }, { "epoch": 29.01, "learning_rate": 2.7861194527861196e-05, "loss": 0.9361, "step": 3320 }, { "epoch": 29.01, "learning_rate": 2.777777777777778e-05, "loss": 0.8429, "step": 3330 }, { "epoch": 29.01, "learning_rate": 2.769436102769436e-05, "loss": 0.6224, "step": 3340 }, { "epoch": 29.02, "learning_rate": 2.7610944277610946e-05, "loss": 0.8489, "step": 3350 }, { "epoch": 29.02, "learning_rate": 2.752752752752753e-05, "loss": 0.9158, "step": 3360 }, { "epoch": 29.02, "eval_accuracy": 0.6359447004608295, "eval_loss": 1.0214248895645142, "eval_runtime": 281.3806, "eval_samples_per_second": 0.771, "eval_steps_per_second": 0.195, "step": 3360 }, { "epoch": 30.0, "learning_rate": 2.744411077744411e-05, "loss": 0.6866, "step": 3370 }, { "epoch": 30.0, "learning_rate": 2.7360694027360696e-05, "loss": 0.8052, "step": 3380 }, { "epoch": 30.0, "learning_rate": 2.727727727727728e-05, "loss": 0.6658, "step": 3390 }, { "epoch": 30.01, "learning_rate": 2.719386052719386e-05, "loss": 0.6167, "step": 3400 }, { "epoch": 30.01, "learning_rate": 2.7110443777110443e-05, "loss": 0.5109, "step": 3410 }, { "epoch": 30.01, "learning_rate": 2.702702702702703e-05, "loss": 1.1015, "step": 3420 }, { "epoch": 30.01, "learning_rate": 2.694361027694361e-05, "loss": 0.7845, "step": 3430 }, { "epoch": 30.01, "learning_rate": 2.6860193526860194e-05, "loss": 0.6872, "step": 3440 }, { "epoch": 30.01, "learning_rate": 2.677677677677678e-05, "loss": 0.4606, "step": 3450 }, { "epoch": 30.02, "learning_rate": 2.669336002669336e-05, "loss": 0.7213, "step": 3460 }, { "epoch": 30.02, "learning_rate": 2.6609943276609944e-05, "loss": 0.8319, "step": 3470 }, { "epoch": 30.02, "eval_accuracy": 0.6682027649769585, "eval_loss": 1.2861884832382202, "eval_runtime": 276.1791, "eval_samples_per_second": 0.786, "eval_steps_per_second": 0.199, "step": 3472 }, { "epoch": 31.0, "learning_rate": 2.652652652652653e-05, "loss": 0.7171, "step": 3480 }, { "epoch": 31.0, "learning_rate": 2.6443109776443108e-05, "loss": 1.2605, "step": 3490 }, { "epoch": 31.0, "learning_rate": 2.6359693026359694e-05, "loss": 0.7113, "step": 3500 }, { "epoch": 31.01, "learning_rate": 2.6276276276276278e-05, "loss": 0.4538, "step": 3510 }, { "epoch": 31.01, "learning_rate": 2.6192859526192858e-05, "loss": 0.5572, "step": 3520 }, { "epoch": 31.01, "learning_rate": 2.6109442776109445e-05, "loss": 0.7715, "step": 3530 }, { "epoch": 31.01, "learning_rate": 2.6026026026026028e-05, "loss": 1.0429, "step": 3540 }, { "epoch": 31.01, "learning_rate": 2.5942609275942608e-05, "loss": 0.7335, "step": 3550 }, { "epoch": 31.01, "learning_rate": 2.5859192525859195e-05, "loss": 0.7139, "step": 3560 }, { "epoch": 31.01, "learning_rate": 2.577577577577578e-05, "loss": 0.422, "step": 3570 }, { "epoch": 31.02, "learning_rate": 2.569235902569236e-05, "loss": 0.6775, "step": 3580 }, { "epoch": 31.02, "eval_accuracy": 0.6405529953917051, "eval_loss": 1.0971248149871826, "eval_runtime": 295.308, "eval_samples_per_second": 0.735, "eval_steps_per_second": 0.186, "step": 3584 }, { "epoch": 32.0, "learning_rate": 2.5608942275608942e-05, "loss": 0.3834, "step": 3590 }, { "epoch": 32.0, "learning_rate": 2.552552552552553e-05, "loss": 0.5938, "step": 3600 }, { "epoch": 32.0, "learning_rate": 2.544210877544211e-05, "loss": 0.6666, "step": 3610 }, { "epoch": 32.01, "learning_rate": 2.5358692025358692e-05, "loss": 0.7156, "step": 3620 }, { "epoch": 32.01, "learning_rate": 2.527527527527528e-05, "loss": 0.816, "step": 3630 }, { "epoch": 32.01, "learning_rate": 2.519185852519186e-05, "loss": 0.7641, "step": 3640 }, { "epoch": 32.01, "learning_rate": 2.5108441775108443e-05, "loss": 0.7681, "step": 3650 }, { "epoch": 32.01, "learning_rate": 2.502502502502503e-05, "loss": 0.9274, "step": 3660 }, { "epoch": 32.01, "learning_rate": 2.494160827494161e-05, "loss": 0.8763, "step": 3670 }, { "epoch": 32.01, "learning_rate": 2.4858191524858193e-05, "loss": 0.5928, "step": 3680 }, { "epoch": 32.02, "learning_rate": 2.4774774774774777e-05, "loss": 0.7191, "step": 3690 }, { "epoch": 32.02, "eval_accuracy": 0.6497695852534562, "eval_loss": 1.026401400566101, "eval_runtime": 280.0325, "eval_samples_per_second": 0.775, "eval_steps_per_second": 0.196, "step": 3696 }, { "epoch": 33.0, "learning_rate": 2.4691358024691357e-05, "loss": 0.9832, "step": 3700 }, { "epoch": 33.0, "learning_rate": 2.4607941274607943e-05, "loss": 0.7523, "step": 3710 }, { "epoch": 33.0, "learning_rate": 2.4524524524524527e-05, "loss": 0.6381, "step": 3720 }, { "epoch": 33.01, "learning_rate": 2.4441107774441107e-05, "loss": 0.7641, "step": 3730 }, { "epoch": 33.01, "learning_rate": 2.4357691024357694e-05, "loss": 0.5699, "step": 3740 }, { "epoch": 33.01, "learning_rate": 2.4274274274274274e-05, "loss": 0.5342, "step": 3750 }, { "epoch": 33.01, "learning_rate": 2.4190857524190857e-05, "loss": 0.4933, "step": 3760 }, { "epoch": 33.01, "learning_rate": 2.4107440774107444e-05, "loss": 0.7027, "step": 3770 }, { "epoch": 33.01, "learning_rate": 2.4024024024024024e-05, "loss": 0.5812, "step": 3780 }, { "epoch": 33.01, "learning_rate": 2.3940607273940608e-05, "loss": 0.5228, "step": 3790 }, { "epoch": 33.02, "learning_rate": 2.385719052385719e-05, "loss": 0.7662, "step": 3800 }, { "epoch": 33.02, "eval_accuracy": 0.6405529953917051, "eval_loss": 1.0589454174041748, "eval_runtime": 290.7017, "eval_samples_per_second": 0.746, "eval_steps_per_second": 0.189, "step": 3808 }, { "epoch": 34.0, "learning_rate": 2.3773773773773775e-05, "loss": 0.5944, "step": 3810 }, { "epoch": 34.0, "learning_rate": 2.3690357023690358e-05, "loss": 0.588, "step": 3820 }, { "epoch": 34.0, "learning_rate": 2.360694027360694e-05, "loss": 0.6972, "step": 3830 }, { "epoch": 34.0, "learning_rate": 2.3523523523523525e-05, "loss": 0.544, "step": 3840 }, { "epoch": 34.01, "learning_rate": 2.344010677344011e-05, "loss": 0.6615, "step": 3850 }, { "epoch": 34.01, "learning_rate": 2.3356690023356692e-05, "loss": 0.5002, "step": 3860 }, { "epoch": 34.01, "learning_rate": 2.3273273273273275e-05, "loss": 0.5654, "step": 3870 }, { "epoch": 34.01, "learning_rate": 2.3189856523189855e-05, "loss": 0.6728, "step": 3880 }, { "epoch": 34.01, "learning_rate": 2.3106439773106442e-05, "loss": 0.5296, "step": 3890 }, { "epoch": 34.01, "learning_rate": 2.3023023023023026e-05, "loss": 0.4234, "step": 3900 }, { "epoch": 34.02, "learning_rate": 2.2939606272939606e-05, "loss": 0.5872, "step": 3910 }, { "epoch": 34.02, "learning_rate": 2.2856189522856192e-05, "loss": 0.7313, "step": 3920 }, { "epoch": 34.02, "eval_accuracy": 0.5622119815668203, "eval_loss": 1.5075870752334595, "eval_runtime": 286.0606, "eval_samples_per_second": 0.759, "eval_steps_per_second": 0.192, "step": 3920 }, { "epoch": 35.0, "learning_rate": 2.2772772772772773e-05, "loss": 0.5481, "step": 3930 }, { "epoch": 35.0, "learning_rate": 2.2689356022689356e-05, "loss": 0.7286, "step": 3940 }, { "epoch": 35.0, "learning_rate": 2.2605939272605943e-05, "loss": 0.795, "step": 3950 }, { "epoch": 35.01, "learning_rate": 2.2522522522522523e-05, "loss": 0.7738, "step": 3960 }, { "epoch": 35.01, "learning_rate": 2.2439105772439106e-05, "loss": 0.8864, "step": 3970 }, { "epoch": 35.01, "learning_rate": 2.235568902235569e-05, "loss": 0.3864, "step": 3980 }, { "epoch": 35.01, "learning_rate": 2.2272272272272273e-05, "loss": 0.6556, "step": 3990 }, { "epoch": 35.01, "learning_rate": 2.2188855522188857e-05, "loss": 0.9211, "step": 4000 }, { "epoch": 35.01, "learning_rate": 2.210543877210544e-05, "loss": 0.6319, "step": 4010 }, { "epoch": 35.02, "learning_rate": 2.2022022022022024e-05, "loss": 0.6768, "step": 4020 }, { "epoch": 35.02, "learning_rate": 2.1938605271938607e-05, "loss": 0.7539, "step": 4030 }, { "epoch": 35.02, "eval_accuracy": 0.5898617511520737, "eval_loss": 1.2265080213546753, "eval_runtime": 266.6298, "eval_samples_per_second": 0.814, "eval_steps_per_second": 0.206, "step": 4032 }, { "epoch": 36.0, "learning_rate": 2.1855188521855187e-05, "loss": 0.6197, "step": 4040 }, { "epoch": 36.0, "learning_rate": 2.1771771771771774e-05, "loss": 0.5972, "step": 4050 }, { "epoch": 36.0, "learning_rate": 2.1688355021688357e-05, "loss": 0.9935, "step": 4060 }, { "epoch": 36.01, "learning_rate": 2.1604938271604937e-05, "loss": 0.6247, "step": 4070 }, { "epoch": 36.01, "learning_rate": 2.1521521521521524e-05, "loss": 0.6728, "step": 4080 }, { "epoch": 36.01, "learning_rate": 2.1438104771438104e-05, "loss": 0.8943, "step": 4090 }, { "epoch": 36.01, "learning_rate": 2.1354688021354688e-05, "loss": 0.6704, "step": 4100 }, { "epoch": 36.01, "learning_rate": 2.1271271271271275e-05, "loss": 0.8664, "step": 4110 }, { "epoch": 36.01, "learning_rate": 2.1187854521187855e-05, "loss": 0.8486, "step": 4120 }, { "epoch": 36.01, "learning_rate": 2.1104437771104438e-05, "loss": 0.5425, "step": 4130 }, { "epoch": 36.02, "learning_rate": 2.102102102102102e-05, "loss": 0.571, "step": 4140 }, { "epoch": 36.02, "eval_accuracy": 0.6267281105990783, "eval_loss": 1.1598420143127441, "eval_runtime": 288.0962, "eval_samples_per_second": 0.753, "eval_steps_per_second": 0.191, "step": 4144 }, { "epoch": 37.0, "learning_rate": 2.0937604270937605e-05, "loss": 0.2915, "step": 4150 }, { "epoch": 37.0, "learning_rate": 2.085418752085419e-05, "loss": 0.5187, "step": 4160 }, { "epoch": 37.0, "learning_rate": 2.0770770770770772e-05, "loss": 0.4894, "step": 4170 }, { "epoch": 37.01, "learning_rate": 2.0687354020687355e-05, "loss": 0.522, "step": 4180 }, { "epoch": 37.01, "learning_rate": 2.060393727060394e-05, "loss": 0.3428, "step": 4190 }, { "epoch": 37.01, "learning_rate": 2.0520520520520522e-05, "loss": 0.4749, "step": 4200 }, { "epoch": 37.01, "learning_rate": 2.0437103770437106e-05, "loss": 0.9899, "step": 4210 }, { "epoch": 37.01, "learning_rate": 2.0353687020353686e-05, "loss": 0.7875, "step": 4220 }, { "epoch": 37.01, "learning_rate": 2.0270270270270273e-05, "loss": 0.4509, "step": 4230 }, { "epoch": 37.01, "learning_rate": 2.0186853520186856e-05, "loss": 0.5523, "step": 4240 }, { "epoch": 37.02, "learning_rate": 2.0103436770103436e-05, "loss": 0.3404, "step": 4250 }, { "epoch": 37.02, "eval_accuracy": 0.6359447004608295, "eval_loss": 1.0306791067123413, "eval_runtime": 260.5195, "eval_samples_per_second": 0.833, "eval_steps_per_second": 0.211, "step": 4256 }, { "epoch": 38.0, "learning_rate": 2.0020020020020023e-05, "loss": 0.6409, "step": 4260 }, { "epoch": 38.0, "learning_rate": 1.9936603269936603e-05, "loss": 0.9606, "step": 4270 }, { "epoch": 38.0, "learning_rate": 1.9853186519853186e-05, "loss": 0.5896, "step": 4280 }, { "epoch": 38.01, "learning_rate": 1.9769769769769773e-05, "loss": 1.0073, "step": 4290 }, { "epoch": 38.01, "learning_rate": 1.9686353019686353e-05, "loss": 0.7469, "step": 4300 }, { "epoch": 38.01, "learning_rate": 1.9602936269602937e-05, "loss": 0.9638, "step": 4310 }, { "epoch": 38.01, "learning_rate": 1.951951951951952e-05, "loss": 0.2893, "step": 4320 }, { "epoch": 38.01, "learning_rate": 1.9436102769436104e-05, "loss": 0.9141, "step": 4330 }, { "epoch": 38.01, "learning_rate": 1.9352686019352687e-05, "loss": 0.4846, "step": 4340 }, { "epoch": 38.01, "learning_rate": 1.9269269269269267e-05, "loss": 0.2622, "step": 4350 }, { "epoch": 38.02, "learning_rate": 1.9185852519185854e-05, "loss": 0.5553, "step": 4360 }, { "epoch": 38.02, "eval_accuracy": 0.7235023041474654, "eval_loss": 0.8180494904518127, "eval_runtime": 302.1979, "eval_samples_per_second": 0.718, "eval_steps_per_second": 0.182, "step": 4368 }, { "epoch": 39.0, "learning_rate": 1.9102435769102438e-05, "loss": 0.7832, "step": 4370 }, { "epoch": 39.0, "learning_rate": 1.9019019019019018e-05, "loss": 0.7238, "step": 4380 }, { "epoch": 39.0, "learning_rate": 1.8935602268935604e-05, "loss": 0.2628, "step": 4390 }, { "epoch": 39.0, "learning_rate": 1.8852185518852188e-05, "loss": 0.7441, "step": 4400 }, { "epoch": 39.01, "learning_rate": 1.8768768768768768e-05, "loss": 0.4422, "step": 4410 }, { "epoch": 39.01, "learning_rate": 1.8685352018685355e-05, "loss": 0.5266, "step": 4420 }, { "epoch": 39.01, "learning_rate": 1.8601935268601935e-05, "loss": 0.5469, "step": 4430 }, { "epoch": 39.01, "learning_rate": 1.8518518518518518e-05, "loss": 0.9752, "step": 4440 }, { "epoch": 39.01, "learning_rate": 1.8435101768435105e-05, "loss": 1.0334, "step": 4450 }, { "epoch": 39.01, "learning_rate": 1.8351685018351685e-05, "loss": 0.604, "step": 4460 }, { "epoch": 39.02, "learning_rate": 1.826826826826827e-05, "loss": 0.7227, "step": 4470 }, { "epoch": 39.02, "learning_rate": 1.8184851518184852e-05, "loss": 0.8499, "step": 4480 }, { "epoch": 39.02, "eval_accuracy": 0.6497695852534562, "eval_loss": 1.0074414014816284, "eval_runtime": 296.0859, "eval_samples_per_second": 0.733, "eval_steps_per_second": 0.186, "step": 4480 }, { "epoch": 40.0, "learning_rate": 1.8101434768101436e-05, "loss": 0.3984, "step": 4490 }, { "epoch": 40.0, "learning_rate": 1.801801801801802e-05, "loss": 0.5711, "step": 4500 }, { "epoch": 40.0, "learning_rate": 1.7934601267934602e-05, "loss": 0.5289, "step": 4510 }, { "epoch": 40.01, "learning_rate": 1.7851184517851186e-05, "loss": 0.7913, "step": 4520 }, { "epoch": 40.01, "learning_rate": 1.776776776776777e-05, "loss": 0.6343, "step": 4530 }, { "epoch": 40.01, "learning_rate": 1.7684351017684353e-05, "loss": 0.5266, "step": 4540 }, { "epoch": 40.01, "learning_rate": 1.7600934267600936e-05, "loss": 0.5792, "step": 4550 }, { "epoch": 40.01, "learning_rate": 1.7517517517517516e-05, "loss": 0.8629, "step": 4560 }, { "epoch": 40.01, "learning_rate": 1.7434100767434103e-05, "loss": 0.7761, "step": 4570 }, { "epoch": 40.02, "learning_rate": 1.7350684017350687e-05, "loss": 0.7784, "step": 4580 }, { "epoch": 40.02, "learning_rate": 1.7267267267267267e-05, "loss": 0.5036, "step": 4590 }, { "epoch": 40.02, "eval_accuracy": 0.631336405529954, "eval_loss": 1.1159977912902832, "eval_runtime": 266.794, "eval_samples_per_second": 0.813, "eval_steps_per_second": 0.206, "step": 4592 }, { "epoch": 41.0, "learning_rate": 1.7183850517183853e-05, "loss": 0.8591, "step": 4600 }, { "epoch": 41.0, "learning_rate": 1.7100433767100434e-05, "loss": 0.4294, "step": 4610 }, { "epoch": 41.0, "learning_rate": 1.7017017017017017e-05, "loss": 0.5119, "step": 4620 }, { "epoch": 41.01, "learning_rate": 1.6933600266933604e-05, "loss": 0.3545, "step": 4630 }, { "epoch": 41.01, "learning_rate": 1.6850183516850184e-05, "loss": 0.7615, "step": 4640 }, { "epoch": 41.01, "learning_rate": 1.6766766766766767e-05, "loss": 0.1783, "step": 4650 }, { "epoch": 41.01, "learning_rate": 1.668335001668335e-05, "loss": 0.6125, "step": 4660 }, { "epoch": 41.01, "learning_rate": 1.6599933266599934e-05, "loss": 1.0295, "step": 4670 }, { "epoch": 41.01, "learning_rate": 1.6516516516516518e-05, "loss": 0.6956, "step": 4680 }, { "epoch": 41.01, "learning_rate": 1.6433099766433098e-05, "loss": 0.6383, "step": 4690 }, { "epoch": 41.02, "learning_rate": 1.6349683016349685e-05, "loss": 0.814, "step": 4700 }, { "epoch": 41.02, "eval_accuracy": 0.695852534562212, "eval_loss": 0.903153657913208, "eval_runtime": 298.2312, "eval_samples_per_second": 0.728, "eval_steps_per_second": 0.184, "step": 4704 }, { "epoch": 42.0, "learning_rate": 1.6266266266266268e-05, "loss": 0.5097, "step": 4710 }, { "epoch": 42.0, "learning_rate": 1.6182849516182848e-05, "loss": 0.4471, "step": 4720 }, { "epoch": 42.0, "learning_rate": 1.6099432766099435e-05, "loss": 0.4364, "step": 4730 }, { "epoch": 42.01, "learning_rate": 1.6016016016016015e-05, "loss": 0.5641, "step": 4740 }, { "epoch": 42.01, "learning_rate": 1.59325992659326e-05, "loss": 0.8213, "step": 4750 }, { "epoch": 42.01, "learning_rate": 1.5849182515849185e-05, "loss": 0.7006, "step": 4760 }, { "epoch": 42.01, "learning_rate": 1.5765765765765765e-05, "loss": 0.5187, "step": 4770 }, { "epoch": 42.01, "learning_rate": 1.568234901568235e-05, "loss": 0.3012, "step": 4780 }, { "epoch": 42.01, "learning_rate": 1.5598932265598936e-05, "loss": 0.7188, "step": 4790 }, { "epoch": 42.01, "learning_rate": 1.5515515515515516e-05, "loss": 0.5198, "step": 4800 }, { "epoch": 42.02, "learning_rate": 1.54320987654321e-05, "loss": 0.7293, "step": 4810 }, { "epoch": 42.02, "eval_accuracy": 0.728110599078341, "eval_loss": 0.9331218004226685, "eval_runtime": 258.6829, "eval_samples_per_second": 0.839, "eval_steps_per_second": 0.213, "step": 4816 }, { "epoch": 43.0, "learning_rate": 1.5348682015348683e-05, "loss": 0.4944, "step": 4820 }, { "epoch": 43.0, "learning_rate": 1.5265265265265266e-05, "loss": 0.4447, "step": 4830 }, { "epoch": 43.0, "learning_rate": 1.5181848515181848e-05, "loss": 0.3132, "step": 4840 }, { "epoch": 43.01, "learning_rate": 1.5098431765098433e-05, "loss": 0.5882, "step": 4850 }, { "epoch": 43.01, "learning_rate": 1.5015015015015016e-05, "loss": 0.7151, "step": 4860 }, { "epoch": 43.01, "learning_rate": 1.4931598264931598e-05, "loss": 0.7703, "step": 4870 }, { "epoch": 43.01, "learning_rate": 1.4848181514848183e-05, "loss": 0.8505, "step": 4880 }, { "epoch": 43.01, "learning_rate": 1.4764764764764765e-05, "loss": 0.3957, "step": 4890 }, { "epoch": 43.01, "learning_rate": 1.4681348014681348e-05, "loss": 0.5204, "step": 4900 }, { "epoch": 43.01, "learning_rate": 1.4597931264597934e-05, "loss": 0.4383, "step": 4910 }, { "epoch": 43.02, "learning_rate": 1.4514514514514515e-05, "loss": 0.4402, "step": 4920 }, { "epoch": 43.02, "eval_accuracy": 0.5668202764976958, "eval_loss": 1.4190495014190674, "eval_runtime": 275.5299, "eval_samples_per_second": 0.788, "eval_steps_per_second": 0.2, "step": 4928 }, { "epoch": 44.0, "learning_rate": 1.4431097764431097e-05, "loss": 0.2849, "step": 4930 }, { "epoch": 44.0, "learning_rate": 1.4347681014347682e-05, "loss": 0.5245, "step": 4940 }, { "epoch": 44.0, "learning_rate": 1.4264264264264266e-05, "loss": 0.4884, "step": 4950 }, { "epoch": 44.0, "learning_rate": 1.4180847514180847e-05, "loss": 0.3824, "step": 4960 }, { "epoch": 44.01, "learning_rate": 1.4097430764097431e-05, "loss": 0.5175, "step": 4970 }, { "epoch": 44.01, "learning_rate": 1.4014014014014016e-05, "loss": 0.5526, "step": 4980 }, { "epoch": 44.01, "learning_rate": 1.3930597263930598e-05, "loss": 0.4134, "step": 4990 }, { "epoch": 44.01, "learning_rate": 1.384718051384718e-05, "loss": 0.4121, "step": 5000 }, { "epoch": 44.01, "learning_rate": 1.3763763763763765e-05, "loss": 0.7849, "step": 5010 }, { "epoch": 44.01, "learning_rate": 1.3680347013680348e-05, "loss": 0.7587, "step": 5020 }, { "epoch": 44.02, "learning_rate": 1.359693026359693e-05, "loss": 0.4583, "step": 5030 }, { "epoch": 44.02, "learning_rate": 1.3513513513513515e-05, "loss": 0.4625, "step": 5040 }, { "epoch": 44.02, "eval_accuracy": 0.7004608294930875, "eval_loss": 1.0268219709396362, "eval_runtime": 299.0881, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.184, "step": 5040 }, { "epoch": 45.0, "learning_rate": 1.3430096763430097e-05, "loss": 0.4425, "step": 5050 }, { "epoch": 45.0, "learning_rate": 1.334668001334668e-05, "loss": 0.6364, "step": 5060 }, { "epoch": 45.0, "learning_rate": 1.3263263263263265e-05, "loss": 0.386, "step": 5070 }, { "epoch": 45.01, "learning_rate": 1.3179846513179847e-05, "loss": 0.6462, "step": 5080 }, { "epoch": 45.01, "learning_rate": 1.3096429763096429e-05, "loss": 0.8925, "step": 5090 }, { "epoch": 45.01, "learning_rate": 1.3013013013013014e-05, "loss": 0.6358, "step": 5100 }, { "epoch": 45.01, "learning_rate": 1.2929596262929598e-05, "loss": 0.8605, "step": 5110 }, { "epoch": 45.01, "learning_rate": 1.284617951284618e-05, "loss": 0.9282, "step": 5120 }, { "epoch": 45.01, "learning_rate": 1.2762762762762764e-05, "loss": 0.7836, "step": 5130 }, { "epoch": 45.02, "learning_rate": 1.2679346012679346e-05, "loss": 0.4875, "step": 5140 }, { "epoch": 45.02, "learning_rate": 1.259592926259593e-05, "loss": 0.2266, "step": 5150 }, { "epoch": 45.02, "eval_accuracy": 0.6405529953917051, "eval_loss": 1.2808195352554321, "eval_runtime": 257.9463, "eval_samples_per_second": 0.841, "eval_steps_per_second": 0.213, "step": 5152 }, { "epoch": 46.0, "learning_rate": 1.2512512512512515e-05, "loss": 0.6837, "step": 5160 }, { "epoch": 46.0, "learning_rate": 1.2429095762429097e-05, "loss": 0.3609, "step": 5170 }, { "epoch": 46.0, "learning_rate": 1.2345679012345678e-05, "loss": 0.7921, "step": 5180 }, { "epoch": 46.01, "learning_rate": 1.2262262262262263e-05, "loss": 1.2116, "step": 5190 }, { "epoch": 46.01, "learning_rate": 1.2178845512178847e-05, "loss": 0.6612, "step": 5200 }, { "epoch": 46.01, "learning_rate": 1.2095428762095429e-05, "loss": 0.2317, "step": 5210 }, { "epoch": 46.01, "learning_rate": 1.2012012012012012e-05, "loss": 0.4213, "step": 5220 }, { "epoch": 46.01, "learning_rate": 1.1928595261928596e-05, "loss": 0.8834, "step": 5230 }, { "epoch": 46.01, "learning_rate": 1.1845178511845179e-05, "loss": 0.8265, "step": 5240 }, { "epoch": 46.01, "learning_rate": 1.1761761761761762e-05, "loss": 0.1315, "step": 5250 }, { "epoch": 46.02, "learning_rate": 1.1678345011678346e-05, "loss": 0.7424, "step": 5260 }, { "epoch": 46.02, "eval_accuracy": 0.6497695852534562, "eval_loss": 1.1821485757827759, "eval_runtime": 277.4384, "eval_samples_per_second": 0.782, "eval_steps_per_second": 0.198, "step": 5264 }, { "epoch": 47.0, "learning_rate": 1.1594928261594928e-05, "loss": 0.7674, "step": 5270 }, { "epoch": 47.0, "learning_rate": 1.1511511511511513e-05, "loss": 0.8093, "step": 5280 }, { "epoch": 47.0, "learning_rate": 1.1428094761428096e-05, "loss": 0.3749, "step": 5290 }, { "epoch": 47.01, "learning_rate": 1.1344678011344678e-05, "loss": 0.572, "step": 5300 }, { "epoch": 47.01, "learning_rate": 1.1261261261261261e-05, "loss": 0.4389, "step": 5310 }, { "epoch": 47.01, "learning_rate": 1.1177844511177845e-05, "loss": 0.421, "step": 5320 }, { "epoch": 47.01, "learning_rate": 1.1094427761094428e-05, "loss": 0.7683, "step": 5330 }, { "epoch": 47.01, "learning_rate": 1.1011011011011012e-05, "loss": 0.333, "step": 5340 }, { "epoch": 47.01, "learning_rate": 1.0927594260927594e-05, "loss": 0.8005, "step": 5350 }, { "epoch": 47.01, "learning_rate": 1.0844177510844179e-05, "loss": 0.5426, "step": 5360 }, { "epoch": 47.02, "learning_rate": 1.0760760760760762e-05, "loss": 0.4852, "step": 5370 }, { "epoch": 47.02, "eval_accuracy": 0.6589861751152074, "eval_loss": 1.2433668375015259, "eval_runtime": 303.2594, "eval_samples_per_second": 0.716, "eval_steps_per_second": 0.181, "step": 5376 }, { "epoch": 48.0, "learning_rate": 1.0677344010677344e-05, "loss": 0.9861, "step": 5380 }, { "epoch": 48.0, "learning_rate": 1.0593927260593927e-05, "loss": 0.4832, "step": 5390 }, { "epoch": 48.0, "learning_rate": 1.051051051051051e-05, "loss": 0.3998, "step": 5400 }, { "epoch": 48.01, "learning_rate": 1.0427093760427094e-05, "loss": 0.599, "step": 5410 }, { "epoch": 48.01, "learning_rate": 1.0343677010343678e-05, "loss": 0.6899, "step": 5420 }, { "epoch": 48.01, "learning_rate": 1.0260260260260261e-05, "loss": 0.618, "step": 5430 }, { "epoch": 48.01, "learning_rate": 1.0176843510176843e-05, "loss": 0.5096, "step": 5440 }, { "epoch": 48.01, "learning_rate": 1.0093426760093428e-05, "loss": 0.379, "step": 5450 }, { "epoch": 48.01, "learning_rate": 1.0010010010010011e-05, "loss": 0.5432, "step": 5460 }, { "epoch": 48.01, "learning_rate": 9.926593259926593e-06, "loss": 0.5489, "step": 5470 }, { "epoch": 48.02, "learning_rate": 9.843176509843177e-06, "loss": 0.523, "step": 5480 }, { "epoch": 48.02, "eval_accuracy": 0.6267281105990783, "eval_loss": 1.2123322486877441, "eval_runtime": 258.3979, "eval_samples_per_second": 0.84, "eval_steps_per_second": 0.213, "step": 5488 }, { "epoch": 49.0, "learning_rate": 9.75975975975976e-06, "loss": 0.2441, "step": 5490 }, { "epoch": 49.0, "learning_rate": 9.676343009676344e-06, "loss": 0.6196, "step": 5500 }, { "epoch": 49.0, "learning_rate": 9.592926259592927e-06, "loss": 0.3021, "step": 5510 }, { "epoch": 49.0, "learning_rate": 9.509509509509509e-06, "loss": 0.73, "step": 5520 }, { "epoch": 49.01, "learning_rate": 9.426092759426094e-06, "loss": 0.5437, "step": 5530 }, { "epoch": 49.01, "learning_rate": 9.342676009342677e-06, "loss": 0.3555, "step": 5540 }, { "epoch": 49.01, "learning_rate": 9.259259259259259e-06, "loss": 0.5324, "step": 5550 }, { "epoch": 49.01, "learning_rate": 9.175842509175843e-06, "loss": 0.3838, "step": 5560 }, { "epoch": 49.01, "learning_rate": 9.092425759092426e-06, "loss": 0.5131, "step": 5570 }, { "epoch": 49.01, "learning_rate": 9.00900900900901e-06, "loss": 0.5087, "step": 5580 }, { "epoch": 49.02, "learning_rate": 8.925592258925593e-06, "loss": 0.5122, "step": 5590 }, { "epoch": 49.02, "learning_rate": 8.842175508842176e-06, "loss": 0.8344, "step": 5600 }, { "epoch": 49.02, "eval_accuracy": 0.663594470046083, "eval_loss": 1.1888865232467651, "eval_runtime": 295.3851, "eval_samples_per_second": 0.735, "eval_steps_per_second": 0.186, "step": 5600 }, { "epoch": 50.0, "learning_rate": 8.758758758758758e-06, "loss": 0.6569, "step": 5610 }, { "epoch": 50.0, "learning_rate": 8.675342008675343e-06, "loss": 0.4491, "step": 5620 }, { "epoch": 50.0, "learning_rate": 8.591925258591927e-06, "loss": 0.6111, "step": 5630 }, { "epoch": 50.01, "learning_rate": 8.508508508508508e-06, "loss": 0.2911, "step": 5640 }, { "epoch": 50.01, "learning_rate": 8.425091758425092e-06, "loss": 0.6463, "step": 5650 }, { "epoch": 50.01, "learning_rate": 8.341675008341675e-06, "loss": 0.3108, "step": 5660 }, { "epoch": 50.01, "learning_rate": 8.258258258258259e-06, "loss": 0.4594, "step": 5670 }, { "epoch": 50.01, "learning_rate": 8.174841508174842e-06, "loss": 0.4389, "step": 5680 }, { "epoch": 50.01, "learning_rate": 8.091424758091424e-06, "loss": 0.4616, "step": 5690 }, { "epoch": 50.02, "learning_rate": 8.008008008008007e-06, "loss": 0.2669, "step": 5700 }, { "epoch": 50.02, "learning_rate": 7.924591257924593e-06, "loss": 0.6648, "step": 5710 }, { "epoch": 50.02, "eval_accuracy": 0.6405529953917051, "eval_loss": 1.2328165769577026, "eval_runtime": 294.1867, "eval_samples_per_second": 0.738, "eval_steps_per_second": 0.187, "step": 5712 }, { "epoch": 51.0, "learning_rate": 7.841174507841174e-06, "loss": 0.5881, "step": 5720 }, { "epoch": 51.0, "learning_rate": 7.757757757757758e-06, "loss": 0.658, "step": 5730 }, { "epoch": 51.0, "learning_rate": 7.674341007674341e-06, "loss": 0.473, "step": 5740 }, { "epoch": 51.01, "learning_rate": 7.590924257590924e-06, "loss": 0.561, "step": 5750 }, { "epoch": 51.01, "learning_rate": 7.507507507507508e-06, "loss": 0.3124, "step": 5760 }, { "epoch": 51.01, "learning_rate": 7.424090757424092e-06, "loss": 0.6171, "step": 5770 }, { "epoch": 51.01, "learning_rate": 7.340674007340674e-06, "loss": 0.2376, "step": 5780 }, { "epoch": 51.01, "learning_rate": 7.257257257257258e-06, "loss": 0.5859, "step": 5790 }, { "epoch": 51.01, "learning_rate": 7.173840507173841e-06, "loss": 0.3655, "step": 5800 }, { "epoch": 51.01, "learning_rate": 7.090423757090424e-06, "loss": 0.5225, "step": 5810 }, { "epoch": 51.02, "learning_rate": 7.007007007007008e-06, "loss": 0.6929, "step": 5820 }, { "epoch": 51.02, "eval_accuracy": 0.6129032258064516, "eval_loss": 1.3269143104553223, "eval_runtime": 258.5914, "eval_samples_per_second": 0.839, "eval_steps_per_second": 0.213, "step": 5824 }, { "epoch": 52.0, "learning_rate": 6.92359025692359e-06, "loss": 0.2531, "step": 5830 }, { "epoch": 52.0, "learning_rate": 6.840173506840174e-06, "loss": 0.6214, "step": 5840 }, { "epoch": 52.0, "learning_rate": 6.7567567567567575e-06, "loss": 0.6634, "step": 5850 }, { "epoch": 52.01, "learning_rate": 6.67334000667334e-06, "loss": 0.5057, "step": 5860 }, { "epoch": 52.01, "learning_rate": 6.589923256589924e-06, "loss": 0.4402, "step": 5870 }, { "epoch": 52.01, "learning_rate": 6.506506506506507e-06, "loss": 0.9344, "step": 5880 }, { "epoch": 52.01, "learning_rate": 6.42308975642309e-06, "loss": 0.1245, "step": 5890 }, { "epoch": 52.01, "learning_rate": 6.339673006339673e-06, "loss": 0.568, "step": 5900 }, { "epoch": 52.01, "learning_rate": 6.256256256256257e-06, "loss": 0.4524, "step": 5910 }, { "epoch": 52.01, "learning_rate": 6.172839506172839e-06, "loss": 0.4983, "step": 5920 }, { "epoch": 52.02, "learning_rate": 6.0894227560894234e-06, "loss": 0.4253, "step": 5930 }, { "epoch": 52.02, "eval_accuracy": 0.6820276497695853, "eval_loss": 1.1884889602661133, "eval_runtime": 278.2143, "eval_samples_per_second": 0.78, "eval_steps_per_second": 0.198, "step": 5936 }, { "epoch": 53.0, "learning_rate": 6.006006006006006e-06, "loss": 0.5096, "step": 5940 }, { "epoch": 53.0, "learning_rate": 5.9225892559225895e-06, "loss": 0.6105, "step": 5950 }, { "epoch": 53.0, "learning_rate": 5.839172505839173e-06, "loss": 1.0454, "step": 5960 }, { "epoch": 53.01, "learning_rate": 5.755755755755756e-06, "loss": 0.4144, "step": 5970 }, { "epoch": 53.01, "learning_rate": 5.672339005672339e-06, "loss": 0.8673, "step": 5980 }, { "epoch": 53.01, "learning_rate": 5.5889222555889224e-06, "loss": 0.4563, "step": 5990 }, { "epoch": 53.01, "learning_rate": 5.505505505505506e-06, "loss": 0.5437, "step": 6000 }, { "epoch": 53.01, "learning_rate": 5.422088755422089e-06, "loss": 0.4049, "step": 6010 }, { "epoch": 53.01, "learning_rate": 5.338672005338672e-06, "loss": 0.5225, "step": 6020 }, { "epoch": 53.01, "learning_rate": 5.255255255255255e-06, "loss": 0.4383, "step": 6030 }, { "epoch": 53.02, "learning_rate": 5.171838505171839e-06, "loss": 0.7003, "step": 6040 }, { "epoch": 53.02, "eval_accuracy": 0.7004608294930875, "eval_loss": 1.1521508693695068, "eval_runtime": 293.3646, "eval_samples_per_second": 0.74, "eval_steps_per_second": 0.187, "step": 6048 }, { "epoch": 54.0, "learning_rate": 5.0884217550884214e-06, "loss": 0.6495, "step": 6050 }, { "epoch": 54.0, "learning_rate": 5.005005005005006e-06, "loss": 0.5755, "step": 6060 }, { "epoch": 54.0, "learning_rate": 4.921588254921588e-06, "loss": 0.5131, "step": 6070 }, { "epoch": 54.0, "learning_rate": 4.838171504838172e-06, "loss": 0.5332, "step": 6080 }, { "epoch": 54.01, "learning_rate": 4.754754754754754e-06, "loss": 0.2762, "step": 6090 }, { "epoch": 54.01, "learning_rate": 4.671338004671339e-06, "loss": 0.383, "step": 6100 }, { "epoch": 54.01, "learning_rate": 4.587921254587921e-06, "loss": 0.6731, "step": 6110 }, { "epoch": 54.01, "learning_rate": 4.504504504504505e-06, "loss": 0.4703, "step": 6120 }, { "epoch": 54.01, "learning_rate": 4.421087754421088e-06, "loss": 0.3767, "step": 6130 }, { "epoch": 54.01, "learning_rate": 4.337671004337672e-06, "loss": 0.5669, "step": 6140 }, { "epoch": 54.02, "learning_rate": 4.254254254254254e-06, "loss": 0.4277, "step": 6150 }, { "epoch": 54.02, "learning_rate": 4.170837504170838e-06, "loss": 0.4105, "step": 6160 }, { "epoch": 54.02, "eval_accuracy": 0.7373271889400922, "eval_loss": 1.0037211179733276, "eval_runtime": 262.5907, "eval_samples_per_second": 0.826, "eval_steps_per_second": 0.209, "step": 6160 }, { "epoch": 55.0, "learning_rate": 4.087420754087421e-06, "loss": 0.2836, "step": 6170 }, { "epoch": 55.0, "learning_rate": 4.004004004004004e-06, "loss": 0.6498, "step": 6180 }, { "epoch": 55.0, "learning_rate": 3.920587253920587e-06, "loss": 0.5334, "step": 6190 }, { "epoch": 55.01, "learning_rate": 3.837170503837171e-06, "loss": 0.7971, "step": 6200 }, { "epoch": 55.01, "learning_rate": 3.753753753753754e-06, "loss": 0.3278, "step": 6210 }, { "epoch": 55.01, "learning_rate": 3.670337003670337e-06, "loss": 0.3237, "step": 6220 }, { "epoch": 55.01, "learning_rate": 3.5869202535869206e-06, "loss": 0.7723, "step": 6230 }, { "epoch": 55.01, "learning_rate": 3.503503503503504e-06, "loss": 0.6996, "step": 6240 }, { "epoch": 55.01, "learning_rate": 3.420086753420087e-06, "loss": 0.544, "step": 6250 }, { "epoch": 55.02, "learning_rate": 3.33667000333667e-06, "loss": 0.6693, "step": 6260 }, { "epoch": 55.02, "learning_rate": 3.2532532532532535e-06, "loss": 0.5206, "step": 6270 }, { "epoch": 55.02, "eval_accuracy": 0.7188940092165899, "eval_loss": 1.0913441181182861, "eval_runtime": 299.8505, "eval_samples_per_second": 0.724, "eval_steps_per_second": 0.183, "step": 6272 }, { "epoch": 56.0, "learning_rate": 3.1698365031698365e-06, "loss": 0.2842, "step": 6280 }, { "epoch": 56.0, "learning_rate": 3.0864197530864196e-06, "loss": 0.4048, "step": 6290 }, { "epoch": 56.0, "learning_rate": 3.003003003003003e-06, "loss": 0.6278, "step": 6300 }, { "epoch": 56.01, "learning_rate": 2.9195862529195865e-06, "loss": 0.4073, "step": 6310 }, { "epoch": 56.01, "learning_rate": 2.8361695028361695e-06, "loss": 0.7256, "step": 6320 }, { "epoch": 56.01, "learning_rate": 2.752752752752753e-06, "loss": 0.5157, "step": 6330 }, { "epoch": 56.01, "learning_rate": 2.669336002669336e-06, "loss": 0.205, "step": 6340 }, { "epoch": 56.01, "learning_rate": 2.5859192525859194e-06, "loss": 0.513, "step": 6350 }, { "epoch": 56.01, "learning_rate": 2.502502502502503e-06, "loss": 0.3176, "step": 6360 }, { "epoch": 56.01, "learning_rate": 2.419085752419086e-06, "loss": 0.4901, "step": 6370 }, { "epoch": 56.02, "learning_rate": 2.3356690023356693e-06, "loss": 0.7129, "step": 6380 }, { "epoch": 56.02, "eval_accuracy": 0.6866359447004609, "eval_loss": 1.108277678489685, "eval_runtime": 292.9571, "eval_samples_per_second": 0.741, "eval_steps_per_second": 0.188, "step": 6384 }, { "epoch": 57.0, "learning_rate": 2.2522522522522524e-06, "loss": 0.2624, "step": 6390 }, { "epoch": 57.0, "learning_rate": 2.168835502168836e-06, "loss": 0.0914, "step": 6400 }, { "epoch": 57.0, "learning_rate": 2.085418752085419e-06, "loss": 0.684, "step": 6410 }, { "epoch": 57.01, "learning_rate": 2.002002002002002e-06, "loss": 0.3441, "step": 6420 }, { "epoch": 57.01, "learning_rate": 1.9185852519185853e-06, "loss": 0.4939, "step": 6430 }, { "epoch": 57.01, "learning_rate": 1.8351685018351686e-06, "loss": 0.1774, "step": 6440 }, { "epoch": 57.01, "learning_rate": 1.751751751751752e-06, "loss": 0.5289, "step": 6450 }, { "epoch": 57.01, "learning_rate": 1.668335001668335e-06, "loss": 0.9437, "step": 6460 }, { "epoch": 57.01, "learning_rate": 1.5849182515849183e-06, "loss": 0.7129, "step": 6470 }, { "epoch": 57.01, "learning_rate": 1.5015015015015015e-06, "loss": 0.1014, "step": 6480 }, { "epoch": 57.02, "learning_rate": 1.4180847514180847e-06, "loss": 0.4772, "step": 6490 }, { "epoch": 57.02, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.1276459693908691, "eval_runtime": 263.52, "eval_samples_per_second": 0.823, "eval_steps_per_second": 0.209, "step": 6496 }, { "epoch": 58.0, "learning_rate": 1.334668001334668e-06, "loss": 0.5678, "step": 6500 }, { "epoch": 58.0, "learning_rate": 1.2512512512512514e-06, "loss": 0.2869, "step": 6510 }, { "epoch": 58.0, "learning_rate": 1.1678345011678347e-06, "loss": 0.9173, "step": 6520 }, { "epoch": 58.01, "learning_rate": 1.084417751084418e-06, "loss": 0.5002, "step": 6530 }, { "epoch": 58.01, "learning_rate": 1.001001001001001e-06, "loss": 0.4222, "step": 6540 }, { "epoch": 58.01, "learning_rate": 9.175842509175843e-07, "loss": 0.2793, "step": 6550 }, { "epoch": 58.01, "learning_rate": 8.341675008341675e-07, "loss": 0.2781, "step": 6560 }, { "epoch": 58.01, "learning_rate": 7.507507507507508e-07, "loss": 0.4779, "step": 6570 }, { "epoch": 58.01, "learning_rate": 6.67334000667334e-07, "loss": 0.4709, "step": 6580 }, { "epoch": 58.01, "learning_rate": 5.839172505839173e-07, "loss": 0.5879, "step": 6590 }, { "epoch": 58.02, "learning_rate": 5.005005005005005e-07, "loss": 0.4822, "step": 6600 }, { "epoch": 58.02, "eval_accuracy": 0.7235023041474654, "eval_loss": 1.0919580459594727, "eval_runtime": 304.8665, "eval_samples_per_second": 0.712, "eval_steps_per_second": 0.18, "step": 6608 }, { "epoch": 59.0, "learning_rate": 4.1708375041708376e-07, "loss": 0.601, "step": 6610 }, { "epoch": 59.0, "learning_rate": 3.33667000333667e-07, "loss": 0.5131, "step": 6620 }, { "epoch": 59.0, "learning_rate": 2.5025025025025023e-07, "loss": 0.175, "step": 6630 }, { "epoch": 59.0, "learning_rate": 1.668335001668335e-07, "loss": 0.4028, "step": 6640 }, { "epoch": 59.01, "learning_rate": 8.341675008341675e-08, "loss": 0.5526, "step": 6650 }, { "epoch": 59.01, "learning_rate": 0.0, "loss": 0.6307, "step": 6660 }, { "epoch": 59.01, "eval_accuracy": 0.7188940092165899, "eval_loss": 1.098652958869934, "eval_runtime": 289.552, "eval_samples_per_second": 0.749, "eval_steps_per_second": 0.19, "step": 6660 }, { "epoch": 59.01, "step": 6660, "total_flos": 3.3122529003141366e+19, "train_loss": 0.7877646436920395, "train_runtime": 57566.1475, "train_samples_per_second": 0.463, "train_steps_per_second": 0.116 }, { "epoch": 59.01, "eval_accuracy": 0.6296296296296297, "eval_loss": 0.8929917812347412, "eval_runtime": 260.8949, "eval_samples_per_second": 0.828, "eval_steps_per_second": 0.207, "step": 6660 }, { "epoch": 59.01, "eval_accuracy": 0.6296296296296297, "eval_loss": 0.8929917812347412, "eval_runtime": 274.034, "eval_samples_per_second": 0.788, "eval_steps_per_second": 0.197, "step": 6660 } ], "logging_steps": 10, "max_steps": 6660, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 3.3122529003141366e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }