diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,4817 +1,4583 @@ { - "best_metric": 0.8525345622119815, - "best_model_checkpoint": "videomae-base-finetuned-subset\\checkpoint-5900", - "epoch": 59.00826210826211, + "best_metric": 0.7649769585253456, + "best_model_checkpoint": "videomae-base-finetuned-subset\\checkpoint-2016", + "epoch": 59.00780780780781, "eval_steps": 500, - "global_step": 7020, + "global_step": 6660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 7.122507122507123e-07, - "loss": 1.1155, + "learning_rate": 7.507507507507508e-07, + "loss": 1.7044, "step": 10 }, { "epoch": 0.0, - "learning_rate": 1.4245014245014246e-06, - "loss": 1.1405, + "learning_rate": 1.5015015015015015e-06, + "loss": 1.6581, "step": 20 }, { "epoch": 0.0, - "learning_rate": 2.136752136752137e-06, - "loss": 0.5289, + "learning_rate": 2.2522522522522524e-06, + "loss": 1.5572, "step": 30 }, { "epoch": 0.01, - "learning_rate": 2.8490028490028492e-06, - "loss": 0.8832, + "learning_rate": 3.003003003003003e-06, + "loss": 1.6749, "step": 40 }, { "epoch": 0.01, - "learning_rate": 3.5612535612535615e-06, - "loss": 1.0172, + "learning_rate": 3.753753753753754e-06, + "loss": 1.6511, "step": 50 }, { "epoch": 0.01, - "learning_rate": 4.273504273504274e-06, - "loss": 0.7363, + "learning_rate": 4.504504504504505e-06, + "loss": 1.6406, "step": 60 }, { "epoch": 0.01, - "learning_rate": 4.985754985754986e-06, - "loss": 0.8444, + "learning_rate": 5.255255255255255e-06, + "loss": 1.6205, "step": 70 }, { "epoch": 0.01, - "learning_rate": 5.6980056980056985e-06, - "loss": 0.8984, + "learning_rate": 6.006006006006006e-06, + "loss": 1.707, "step": 80 }, { "epoch": 0.01, - "learning_rate": 6.41025641025641e-06, - "loss": 1.085, + "learning_rate": 6.7567567567567575e-06, + "loss": 1.5589, "step": 90 }, { - "epoch": 0.01, - "learning_rate": 7.122507122507123e-06, - "loss": 0.7478, + "epoch": 0.02, + "learning_rate": 7.507507507507508e-06, + "loss": 1.6865, "step": 100 }, { "epoch": 0.02, - "learning_rate": 7.834757834757835e-06, - "loss": 0.9225, + "learning_rate": 8.258258258258259e-06, + "loss": 1.6074, "step": 110 }, { "epoch": 0.02, - "eval_accuracy": 0.7419354838709677, - "eval_loss": 0.7362823486328125, - "eval_runtime": 289.676, - "eval_samples_per_second": 0.749, - "eval_steps_per_second": 0.19, - "step": 118 + "eval_accuracy": 0.3686635944700461, + "eval_loss": 1.5690008401870728, + "eval_runtime": 253.0884, + "eval_samples_per_second": 0.857, + "eval_steps_per_second": 0.217, + "step": 112 }, { "epoch": 1.0, - "learning_rate": 8.547008547008548e-06, - "loss": 0.8071, + "learning_rate": 9.00900900900901e-06, + "loss": 1.5636, "step": 120 }, { "epoch": 1.0, - "learning_rate": 9.259259259259259e-06, - "loss": 0.4488, + "learning_rate": 9.75975975975976e-06, + "loss": 1.6139, "step": 130 }, { "epoch": 1.0, - "learning_rate": 9.971509971509972e-06, - "loss": 1.23, + "learning_rate": 1.051051051051051e-05, + "loss": 1.591, "step": 140 }, { - "epoch": 1.0, - "learning_rate": 1.0683760683760684e-05, - "loss": 0.7655, + "epoch": 1.01, + "learning_rate": 1.1261261261261261e-05, + "loss": 1.5824, "step": 150 }, { "epoch": 1.01, - "learning_rate": 1.1396011396011397e-05, - "loss": 0.8764, + "learning_rate": 1.2012012012012012e-05, + "loss": 1.6132, "step": 160 }, { "epoch": 1.01, - "learning_rate": 1.2108262108262108e-05, - "loss": 0.5023, + "learning_rate": 1.2762762762762764e-05, + "loss": 1.6279, "step": 170 }, { "epoch": 1.01, - "learning_rate": 1.282051282051282e-05, - "loss": 0.5699, + "learning_rate": 1.3513513513513515e-05, + "loss": 1.6001, "step": 180 }, { "epoch": 1.01, - "learning_rate": 1.3532763532763535e-05, - "loss": 0.8942, + "learning_rate": 1.4264264264264266e-05, + "loss": 1.5873, "step": 190 }, { "epoch": 1.01, - "learning_rate": 1.4245014245014246e-05, - "loss": 0.7764, + "learning_rate": 1.5015015015015016e-05, + "loss": 1.59, "step": 200 }, { "epoch": 1.01, - "learning_rate": 1.4957264957264958e-05, - "loss": 0.8046, + "learning_rate": 1.5765765765765765e-05, + "loss": 1.5691, "step": 210 }, { - "epoch": 1.01, - "learning_rate": 1.566951566951567e-05, - "loss": 0.5461, + "epoch": 1.02, + "learning_rate": 1.6516516516516518e-05, + "loss": 1.6001, "step": 220 }, { "epoch": 1.02, - "learning_rate": 1.6381766381766382e-05, - "loss": 0.8357, - "step": 230 + "eval_accuracy": 0.30414746543778803, + "eval_loss": 1.5782501697540283, + "eval_runtime": 245.0114, + "eval_samples_per_second": 0.886, + "eval_steps_per_second": 0.224, + "step": 224 }, { - "epoch": 1.02, - "eval_accuracy": 0.7004608294930875, - "eval_loss": 0.911893367767334, - "eval_runtime": 306.0217, - "eval_samples_per_second": 0.709, - "eval_steps_per_second": 0.18, - "step": 236 + "epoch": 2.0, + "learning_rate": 1.7267267267267267e-05, + "loss": 1.5771, + "step": 230 }, { "epoch": 2.0, - "learning_rate": 1.7094017094017095e-05, - "loss": 0.575, + "learning_rate": 1.801801801801802e-05, + "loss": 1.5651, "step": 240 }, { "epoch": 2.0, - "learning_rate": 1.7806267806267805e-05, - "loss": 0.7452, + "learning_rate": 1.8768768768768768e-05, + "loss": 1.4911, "step": 250 }, { - "epoch": 2.0, - "learning_rate": 1.8518518518518518e-05, - "loss": 0.7453, + "epoch": 2.01, + "learning_rate": 1.951951951951952e-05, + "loss": 1.6414, "step": 260 }, { - "epoch": 2.0, - "learning_rate": 1.923076923076923e-05, - "loss": 0.5861, + "epoch": 2.01, + "learning_rate": 2.0270270270270273e-05, + "loss": 1.5454, "step": 270 }, { "epoch": 2.01, - "learning_rate": 1.9943019943019945e-05, - "loss": 0.4319, + "learning_rate": 2.102102102102102e-05, + "loss": 1.4563, "step": 280 }, { "epoch": 2.01, - "learning_rate": 2.0655270655270654e-05, - "loss": 0.532, + "learning_rate": 2.1771771771771774e-05, + "loss": 1.4325, "step": 290 }, { "epoch": 2.01, - "learning_rate": 2.1367521367521368e-05, - "loss": 0.8728, + "learning_rate": 2.2522522522522523e-05, + "loss": 1.4738, "step": 300 }, { "epoch": 2.01, - "learning_rate": 2.207977207977208e-05, - "loss": 1.1116, + "learning_rate": 2.3273273273273275e-05, + "loss": 1.3045, "step": 310 }, { "epoch": 2.01, - "learning_rate": 2.2792022792022794e-05, - "loss": 0.9775, + "learning_rate": 2.4024024024024024e-05, + "loss": 1.2273, "step": 320 }, { - "epoch": 2.01, - "learning_rate": 2.3504273504273504e-05, - "loss": 1.1248, + "epoch": 2.02, + "learning_rate": 2.4774774774774777e-05, + "loss": 1.4193, "step": 330 }, { - "epoch": 2.01, - "learning_rate": 2.4216524216524217e-05, - "loss": 0.7516, - "step": 340 + "epoch": 2.02, + "eval_accuracy": 0.3824884792626728, + "eval_loss": 1.4873727560043335, + "eval_runtime": 251.8516, + "eval_samples_per_second": 0.862, + "eval_steps_per_second": 0.218, + "step": 336 }, { - "epoch": 2.02, - "learning_rate": 2.492877492877493e-05, - "loss": 0.474, - "step": 350 + "epoch": 3.0, + "learning_rate": 2.552552552552553e-05, + "loss": 1.5274, + "step": 340 }, { - "epoch": 2.02, - "eval_accuracy": 0.6820276497695853, - "eval_loss": 0.9698442220687866, - "eval_runtime": 313.4651, - "eval_samples_per_second": 0.692, - "eval_steps_per_second": 0.175, - "step": 354 + "epoch": 3.0, + "learning_rate": 2.6276276276276278e-05, + "loss": 1.3287, + "step": 350 }, { "epoch": 3.0, - "learning_rate": 2.564102564102564e-05, - "loss": 0.999, + "learning_rate": 2.702702702702703e-05, + "loss": 1.4428, "step": 360 }, { - "epoch": 3.0, - "learning_rate": 2.6353276353276356e-05, - "loss": 0.758, + "epoch": 3.01, + "learning_rate": 2.777777777777778e-05, + "loss": 1.2924, "step": 370 }, { - "epoch": 3.0, - "learning_rate": 2.706552706552707e-05, - "loss": 0.6963, + "epoch": 3.01, + "learning_rate": 2.852852852852853e-05, + "loss": 1.204, "step": 380 }, { "epoch": 3.01, - "learning_rate": 2.777777777777778e-05, - "loss": 0.5529, + "learning_rate": 2.927927927927928e-05, + "loss": 1.1626, "step": 390 }, { "epoch": 3.01, - "learning_rate": 2.8490028490028492e-05, - "loss": 0.7653, + "learning_rate": 3.0030030030030033e-05, + "loss": 1.5428, "step": 400 }, { "epoch": 3.01, - "learning_rate": 2.9202279202279202e-05, - "loss": 0.8444, + "learning_rate": 3.078078078078078e-05, + "loss": 1.1494, "step": 410 }, { "epoch": 3.01, - "learning_rate": 2.9914529914529915e-05, - "loss": 0.6609, + "learning_rate": 3.153153153153153e-05, + "loss": 1.2899, "step": 420 }, { "epoch": 3.01, - "learning_rate": 3.0626780626780625e-05, - "loss": 0.5681, + "learning_rate": 3.2282282282282286e-05, + "loss": 1.4876, "step": 430 }, { - "epoch": 3.01, - "learning_rate": 3.133903133903134e-05, - "loss": 0.7641, + "epoch": 3.02, + "learning_rate": 3.3033033033033035e-05, + "loss": 1.398, "step": 440 }, { - "epoch": 3.01, - "learning_rate": 3.205128205128206e-05, - "loss": 0.7485, + "epoch": 3.02, + "eval_accuracy": 0.6405529953917051, + "eval_loss": 1.0196824073791504, + "eval_runtime": 247.8505, + "eval_samples_per_second": 0.876, + "eval_steps_per_second": 0.222, + "step": 448 + }, + { + "epoch": 4.0, + "learning_rate": 3.3783783783783784e-05, + "loss": 1.3202, "step": 450 }, { - "epoch": 3.02, - "learning_rate": 3.2763532763532764e-05, - "loss": 0.7458, + "epoch": 4.0, + "learning_rate": 3.453453453453453e-05, + "loss": 1.145, "step": 460 }, { - "epoch": 3.02, - "learning_rate": 3.347578347578348e-05, - "loss": 0.7899, + "epoch": 4.0, + "learning_rate": 3.528528528528528e-05, + "loss": 1.2636, "step": 470 }, - { - "epoch": 3.02, - "eval_accuracy": 0.6774193548387096, - "eval_loss": 1.135077714920044, - "eval_runtime": 285.9321, - "eval_samples_per_second": 0.759, - "eval_steps_per_second": 0.192, - "step": 472 - }, { "epoch": 4.0, - "learning_rate": 3.418803418803419e-05, - "loss": 0.4874, + "learning_rate": 3.603603603603604e-05, + "loss": 1.2266, "step": 480 }, { - "epoch": 4.0, - "learning_rate": 3.4900284900284904e-05, - "loss": 0.6289, + "epoch": 4.01, + "learning_rate": 3.678678678678679e-05, + "loss": 1.6096, "step": 490 }, { - "epoch": 4.0, - "learning_rate": 3.561253561253561e-05, - "loss": 0.6611, + "epoch": 4.01, + "learning_rate": 3.7537537537537536e-05, + "loss": 1.1474, "step": 500 }, { "epoch": 4.01, - "learning_rate": 3.6324786324786323e-05, - "loss": 0.7506, + "learning_rate": 3.8288288288288285e-05, + "loss": 1.1879, "step": 510 }, { "epoch": 4.01, - "learning_rate": 3.7037037037037037e-05, - "loss": 0.777, + "learning_rate": 3.903903903903904e-05, + "loss": 1.0165, "step": 520 }, { "epoch": 4.01, - "learning_rate": 3.774928774928775e-05, - "loss": 0.9948, + "learning_rate": 3.9789789789789796e-05, + "loss": 1.2291, "step": 530 }, { "epoch": 4.01, - "learning_rate": 3.846153846153846e-05, - "loss": 0.704, + "learning_rate": 4.0540540540540545e-05, + "loss": 1.2309, "step": 540 }, { - "epoch": 4.01, - "learning_rate": 3.9173789173789176e-05, - "loss": 0.8743, + "epoch": 4.02, + "learning_rate": 4.1291291291291294e-05, + "loss": 1.0638, "step": 550 }, { - "epoch": 4.01, - "learning_rate": 3.988603988603989e-05, - "loss": 0.6572, + "epoch": 4.02, + "learning_rate": 4.204204204204204e-05, + "loss": 1.2217, "step": 560 }, { - "epoch": 4.01, - "learning_rate": 4.05982905982906e-05, - "loss": 1.0614, - "step": 570 + "epoch": 4.02, + "eval_accuracy": 0.391705069124424, + "eval_loss": 1.3385610580444336, + "eval_runtime": 242.8726, + "eval_samples_per_second": 0.893, + "eval_steps_per_second": 0.226, + "step": 560 }, { - "epoch": 4.02, - "learning_rate": 4.131054131054131e-05, - "loss": 0.6374, - "step": 580 + "epoch": 5.0, + "learning_rate": 4.27927927927928e-05, + "loss": 1.4069, + "step": 570 }, { - "epoch": 4.02, - "learning_rate": 4.202279202279202e-05, - "loss": 0.9015, - "step": 590 + "epoch": 5.0, + "learning_rate": 4.354354354354355e-05, + "loss": 1.26, + "step": 580 }, { - "epoch": 4.02, - "eval_accuracy": 0.4976958525345622, - "eval_loss": 1.3822749853134155, - "eval_runtime": 285.2486, - "eval_samples_per_second": 0.761, - "eval_steps_per_second": 0.193, + "epoch": 5.0, + "learning_rate": 4.42942942942943e-05, + "loss": 1.2424, "step": 590 }, { - "epoch": 5.0, - "learning_rate": 4.2735042735042735e-05, - "loss": 1.1476, + "epoch": 5.01, + "learning_rate": 4.5045045045045046e-05, + "loss": 1.2978, "step": 600 }, { - "epoch": 5.0, - "learning_rate": 4.344729344729345e-05, - "loss": 0.7482, + "epoch": 5.01, + "learning_rate": 4.57957957957958e-05, + "loss": 1.373, "step": 610 }, { - "epoch": 5.0, - "learning_rate": 4.415954415954416e-05, - "loss": 0.835, + "epoch": 5.01, + "learning_rate": 4.654654654654655e-05, + "loss": 1.2995, "step": 620 }, { "epoch": 5.01, - "learning_rate": 4.4871794871794874e-05, - "loss": 0.5413, + "learning_rate": 4.72972972972973e-05, + "loss": 1.053, "step": 630 }, { "epoch": 5.01, - "learning_rate": 4.558404558404559e-05, - "loss": 0.9714, + "learning_rate": 4.804804804804805e-05, + "loss": 1.1977, "step": 640 }, { "epoch": 5.01, - "learning_rate": 4.62962962962963e-05, - "loss": 0.4952, + "learning_rate": 4.87987987987988e-05, + "loss": 1.2698, "step": 650 }, { - "epoch": 5.01, - "learning_rate": 4.700854700854701e-05, - "loss": 1.0349, + "epoch": 5.02, + "learning_rate": 4.954954954954955e-05, + "loss": 1.1812, "step": 660 }, - { - "epoch": 5.01, - "learning_rate": 4.772079772079772e-05, - "loss": 0.8882, - "step": 670 - }, - { - "epoch": 5.01, - "learning_rate": 4.8433048433048433e-05, - "loss": 0.9244, - "step": 680 - }, - { - "epoch": 5.01, - "learning_rate": 4.9145299145299147e-05, - "loss": 1.1217, - "step": 690 - }, { "epoch": 5.02, - "learning_rate": 4.985754985754986e-05, - "loss": 0.7402, - "step": 700 + "learning_rate": 4.9966633299966636e-05, + "loss": 1.2577, + "step": 670 }, { "epoch": 5.02, - "eval_accuracy": 0.695852534562212, - "eval_loss": 0.8660895824432373, - "eval_runtime": 289.3403, - "eval_samples_per_second": 0.75, - "eval_steps_per_second": 0.19, - "step": 708 - }, - { - "epoch": 6.0, - "learning_rate": 4.993668882557772e-05, - "loss": 0.6064, - "step": 710 + "eval_accuracy": 0.5391705069124424, + "eval_loss": 1.2195504903793335, + "eval_runtime": 264.2835, + "eval_samples_per_second": 0.821, + "eval_steps_per_second": 0.208, + "step": 672 }, { "epoch": 6.0, - "learning_rate": 4.985754985754986e-05, - "loss": 0.6741, - "step": 720 + "learning_rate": 4.988321654988322e-05, + "loss": 1.4038, + "step": 680 }, { "epoch": 6.0, - "learning_rate": 4.9778410889522e-05, - "loss": 0.7149, - "step": 730 + "learning_rate": 4.97997997997998e-05, + "loss": 1.4382, + "step": 690 }, { "epoch": 6.0, - "learning_rate": 4.9699271921494144e-05, - "loss": 0.9701, - "step": 740 + "learning_rate": 4.9716383049716386e-05, + "loss": 1.3709, + "step": 700 }, { "epoch": 6.01, - "learning_rate": 4.962013295346629e-05, - "loss": 0.6009, - "step": 750 + "learning_rate": 4.963296629963297e-05, + "loss": 1.0241, + "step": 710 }, { "epoch": 6.01, - "learning_rate": 4.9540993985438435e-05, - "loss": 1.3047, - "step": 760 + "learning_rate": 4.954954954954955e-05, + "loss": 1.5423, + "step": 720 }, { "epoch": 6.01, - "learning_rate": 4.946185501741058e-05, - "loss": 0.7303, - "step": 770 + "learning_rate": 4.9466132799466136e-05, + "loss": 1.2711, + "step": 730 }, { "epoch": 6.01, "learning_rate": 4.938271604938271e-05, - "loss": 0.7054, - "step": 780 + "loss": 1.4259, + "step": 740 }, { "epoch": 6.01, - "learning_rate": 4.930357708135486e-05, - "loss": 0.884, - "step": 790 + "learning_rate": 4.92992992992993e-05, + "loss": 1.2684, + "step": 750 }, { "epoch": 6.01, - "learning_rate": 4.9224438113327004e-05, - "loss": 0.7226, - "step": 800 + "learning_rate": 4.921588254921589e-05, + "loss": 0.9547, + "step": 760 }, { "epoch": 6.01, - "learning_rate": 4.9145299145299147e-05, - "loss": 0.8143, - "step": 810 + "learning_rate": 4.9132465799132463e-05, + "loss": 1.1885, + "step": 770 }, { "epoch": 6.02, - "learning_rate": 4.906616017727129e-05, - "loss": 0.6343, - "step": 820 + "learning_rate": 4.9049049049049054e-05, + "loss": 1.0121, + "step": 780 }, { "epoch": 6.02, - "eval_accuracy": 0.7004608294930875, - "eval_loss": 0.668888509273529, - "eval_runtime": 284.8469, - "eval_samples_per_second": 0.762, - "eval_steps_per_second": 0.193, - "step": 826 - }, - { - "epoch": 7.0, - "learning_rate": 4.898702120924343e-05, - "loss": 0.8134, - "step": 830 + "eval_accuracy": 0.4792626728110599, + "eval_loss": 1.2319154739379883, + "eval_runtime": 261.8591, + "eval_samples_per_second": 0.829, + "eval_steps_per_second": 0.21, + "step": 784 }, { "epoch": 7.0, - "learning_rate": 4.890788224121557e-05, - "loss": 0.664, - "step": 840 + "learning_rate": 4.896563229896564e-05, + "loss": 1.355, + "step": 790 }, { "epoch": 7.0, - "learning_rate": 4.882874327318772e-05, - "loss": 0.7448, - "step": 850 + "learning_rate": 4.8882215548882214e-05, + "loss": 1.4279, + "step": 800 }, { "epoch": 7.0, - "learning_rate": 4.8749604305159865e-05, - "loss": 0.6677, - "step": 860 + "learning_rate": 4.87987987987988e-05, + "loss": 1.0318, + "step": 810 }, { "epoch": 7.01, - "learning_rate": 4.867046533713201e-05, - "loss": 1.0314, - "step": 870 + "learning_rate": 4.871538204871539e-05, + "loss": 1.1375, + "step": 820 }, { "epoch": 7.01, - "learning_rate": 4.859132636910415e-05, - "loss": 0.6049, - "step": 880 + "learning_rate": 4.8631965298631964e-05, + "loss": 1.1721, + "step": 830 }, { "epoch": 7.01, - "learning_rate": 4.851218740107629e-05, - "loss": 1.3879, - "step": 890 + "learning_rate": 4.854854854854855e-05, + "loss": 1.2527, + "step": 840 }, { "epoch": 7.01, - "learning_rate": 4.8433048433048433e-05, - "loss": 0.7445, - "step": 900 + "learning_rate": 4.846513179846514e-05, + "loss": 1.0756, + "step": 850 }, { "epoch": 7.01, - "learning_rate": 4.835390946502058e-05, - "loss": 0.7087, - "step": 910 + "learning_rate": 4.8381715048381715e-05, + "loss": 1.1513, + "step": 860 }, { "epoch": 7.01, - "learning_rate": 4.8274770496992725e-05, - "loss": 0.5025, - "step": 920 + "learning_rate": 4.82982982982983e-05, + "loss": 1.0789, + "step": 870 }, { "epoch": 7.01, - "learning_rate": 4.819563152896486e-05, - "loss": 0.6759, - "step": 930 + "learning_rate": 4.821488154821489e-05, + "loss": 1.2463, + "step": 880 }, { "epoch": 7.02, - "learning_rate": 4.811649256093701e-05, - "loss": 0.7427, - "step": 940 + "learning_rate": 4.8131464798131465e-05, + "loss": 1.2485, + "step": 890 }, { "epoch": 7.02, - "eval_accuracy": 0.6728110599078341, - "eval_loss": 0.9108946919441223, - "eval_runtime": 320.8735, - "eval_samples_per_second": 0.676, - "eval_steps_per_second": 0.171, - "step": 944 + "eval_accuracy": 0.7511520737327189, + "eval_loss": 0.8229795098304749, + "eval_runtime": 252.4438, + "eval_samples_per_second": 0.86, + "eval_steps_per_second": 0.218, + "step": 896 }, { "epoch": 8.0, - "learning_rate": 4.803735359290915e-05, - "loss": 0.9541, - "step": 950 + "learning_rate": 4.804804804804805e-05, + "loss": 0.9738, + "step": 900 }, { "epoch": 8.0, - "learning_rate": 4.7958214624881294e-05, - "loss": 0.9822, - "step": 960 + "learning_rate": 4.796463129796463e-05, + "loss": 0.7946, + "step": 910 }, { "epoch": 8.0, - "learning_rate": 4.787907565685344e-05, - "loss": 0.9233, - "step": 970 + "learning_rate": 4.7881214547881215e-05, + "loss": 1.2819, + "step": 920 }, { "epoch": 8.01, - "learning_rate": 4.779993668882558e-05, - "loss": 0.9739, - "step": 980 + "learning_rate": 4.77977977977978e-05, + "loss": 1.7354, + "step": 930 }, { "epoch": 8.01, - "learning_rate": 4.772079772079772e-05, - "loss": 0.966, - "step": 990 + "learning_rate": 4.771438104771438e-05, + "loss": 1.161, + "step": 940 }, { "epoch": 8.01, - "learning_rate": 4.764165875276987e-05, - "loss": 0.8516, - "step": 1000 + "learning_rate": 4.7630964297630966e-05, + "loss": 1.3311, + "step": 950 }, { "epoch": 8.01, - "learning_rate": 4.756251978474201e-05, - "loss": 0.6187, - "step": 1010 + "learning_rate": 4.754754754754755e-05, + "loss": 0.9299, + "step": 960 }, { "epoch": 8.01, - "learning_rate": 4.7483380816714154e-05, - "loss": 1.2194, - "step": 1020 + "learning_rate": 4.746413079746413e-05, + "loss": 0.9622, + "step": 970 }, { "epoch": 8.01, - "learning_rate": 4.7404241848686296e-05, - "loss": 1.1313, - "step": 1030 + "learning_rate": 4.7380714047380716e-05, + "loss": 0.9557, + "step": 980 }, { "epoch": 8.01, - "learning_rate": 4.732510288065844e-05, - "loss": 0.7851, - "step": 1040 - }, - { - "epoch": 8.02, - "learning_rate": 4.724596391263058e-05, - "loss": 0.8549, - "step": 1050 + "learning_rate": 4.72972972972973e-05, + "loss": 0.9241, + "step": 990 }, { "epoch": 8.02, - "learning_rate": 4.716682494460272e-05, - "loss": 0.5898, - "step": 1060 + "learning_rate": 4.721388054721388e-05, + "loss": 1.025, + "step": 1000 }, { "epoch": 8.02, - "eval_accuracy": 0.5944700460829493, - "eval_loss": 1.0126854181289673, - "eval_runtime": 324.9051, - "eval_samples_per_second": 0.668, - "eval_steps_per_second": 0.169, - "step": 1062 + "eval_accuracy": 0.6866359447004609, + "eval_loss": 0.8022767305374146, + "eval_runtime": 256.9331, + "eval_samples_per_second": 0.845, + "eval_steps_per_second": 0.214, + "step": 1008 }, { "epoch": 9.0, - "learning_rate": 4.708768597657487e-05, - "loss": 0.4566, - "step": 1070 + "learning_rate": 4.7130463797130466e-05, + "loss": 1.6034, + "step": 1010 }, { "epoch": 9.0, - "learning_rate": 4.700854700854701e-05, - "loss": 0.9513, - "step": 1080 + "learning_rate": 4.704704704704705e-05, + "loss": 1.0115, + "step": 1020 }, { "epoch": 9.0, - "learning_rate": 4.692940804051915e-05, - "loss": 0.3735, - "step": 1090 + "learning_rate": 4.696363029696363e-05, + "loss": 1.337, + "step": 1030 }, { - "epoch": 9.01, - "learning_rate": 4.68502690724913e-05, - "loss": 0.7034, - "step": 1100 + "epoch": 9.0, + "learning_rate": 4.688021354688022e-05, + "loss": 1.351, + "step": 1040 }, { "epoch": 9.01, - "learning_rate": 4.677113010446344e-05, - "loss": 0.6437, - "step": 1110 + "learning_rate": 4.67967967967968e-05, + "loss": 1.0992, + "step": 1050 }, { "epoch": 9.01, - "learning_rate": 4.669199113643558e-05, - "loss": 0.8746, - "step": 1120 + "learning_rate": 4.6713380046713384e-05, + "loss": 1.0828, + "step": 1060 }, { "epoch": 9.01, - "learning_rate": 4.6612852168407725e-05, - "loss": 0.6746, - "step": 1130 + "learning_rate": 4.662996329662997e-05, + "loss": 1.0086, + "step": 1070 }, { "epoch": 9.01, - "learning_rate": 4.653371320037987e-05, - "loss": 0.9281, - "step": 1140 + "learning_rate": 4.654654654654655e-05, + "loss": 0.8658, + "step": 1080 }, { "epoch": 9.01, - "learning_rate": 4.645457423235201e-05, - "loss": 0.9406, - "step": 1150 + "learning_rate": 4.6463129796463134e-05, + "loss": 0.7355, + "step": 1090 }, { "epoch": 9.01, - "learning_rate": 4.637543526432416e-05, - "loss": 1.0328, - "step": 1160 + "learning_rate": 4.637971304637971e-05, + "loss": 1.0191, + "step": 1100 }, { "epoch": 9.02, "learning_rate": 4.62962962962963e-05, - "loss": 0.8338, - "step": 1170 + "loss": 1.0171, + "step": 1110 }, { "epoch": 9.02, - "learning_rate": 4.621715732826844e-05, - "loss": 0.6258, - "step": 1180 + "learning_rate": 4.6212879546212884e-05, + "loss": 1.2952, + "step": 1120 }, { "epoch": 9.02, - "eval_accuracy": 0.7235023041474654, - "eval_loss": 0.7130948305130005, - "eval_runtime": 315.7938, - "eval_samples_per_second": 0.687, - "eval_steps_per_second": 0.174, - "step": 1180 + "eval_accuracy": 0.6036866359447005, + "eval_loss": 0.9130178093910217, + "eval_runtime": 256.4943, + "eval_samples_per_second": 0.846, + "eval_steps_per_second": 0.214, + "step": 1120 }, { "epoch": 10.0, - "learning_rate": 4.6138018360240585e-05, - "loss": 0.635, - "step": 1190 + "learning_rate": 4.612946279612946e-05, + "loss": 0.8975, + "step": 1130 }, { "epoch": 10.0, - "learning_rate": 4.605887939221273e-05, - "loss": 0.8311, - "step": 1200 + "learning_rate": 4.604604604604605e-05, + "loss": 1.0215, + "step": 1140 }, { "epoch": 10.0, - "learning_rate": 4.597974042418487e-05, - "loss": 1.2343, - "step": 1210 + "learning_rate": 4.5962629295962635e-05, + "loss": 0.9562, + "step": 1150 }, { "epoch": 10.01, - "learning_rate": 4.590060145615702e-05, - "loss": 0.8252, - "step": 1220 + "learning_rate": 4.587921254587921e-05, + "loss": 1.2006, + "step": 1160 }, { "epoch": 10.01, - "learning_rate": 4.582146248812916e-05, - "loss": 0.7481, - "step": 1230 + "learning_rate": 4.57957957957958e-05, + "loss": 1.2811, + "step": 1170 }, { "epoch": 10.01, - "learning_rate": 4.5742323520101296e-05, - "loss": 0.4203, - "step": 1240 + "learning_rate": 4.5712379045712385e-05, + "loss": 1.0057, + "step": 1180 }, { "epoch": 10.01, - "learning_rate": 4.5663184552073445e-05, - "loss": 0.5636, - "step": 1250 + "learning_rate": 4.562896229562896e-05, + "loss": 1.0403, + "step": 1190 }, { "epoch": 10.01, - "learning_rate": 4.558404558404559e-05, - "loss": 0.5062, - "step": 1260 + "learning_rate": 4.5545545545545545e-05, + "loss": 0.8273, + "step": 1200 }, { "epoch": 10.01, - "learning_rate": 4.550490661601773e-05, - "loss": 0.5214, - "step": 1270 + "learning_rate": 4.5462128795462135e-05, + "loss": 0.7807, + "step": 1210 }, { - "epoch": 10.01, - "learning_rate": 4.542576764798987e-05, - "loss": 0.6217, - "step": 1280 + "epoch": 10.02, + "learning_rate": 4.537871204537871e-05, + "loss": 0.9059, + "step": 1220 }, { "epoch": 10.02, - "learning_rate": 4.5346628679962014e-05, - "loss": 0.9957, - "step": 1290 + "learning_rate": 4.5295295295295295e-05, + "loss": 0.9499, + "step": 1230 }, { "epoch": 10.02, - "eval_accuracy": 0.6728110599078341, - "eval_loss": 0.9507045745849609, - "eval_runtime": 315.2197, - "eval_samples_per_second": 0.688, - "eval_steps_per_second": 0.174, - "step": 1298 + "eval_accuracy": 0.6036866359447005, + "eval_loss": 1.0620718002319336, + "eval_runtime": 255.1418, + "eval_samples_per_second": 0.851, + "eval_steps_per_second": 0.216, + "step": 1232 }, { "epoch": 11.0, - "learning_rate": 4.5267489711934157e-05, - "loss": 0.5816, - "step": 1300 - }, - { - "epoch": 11.0, - "learning_rate": 4.51883507439063e-05, - "loss": 0.8868, - "step": 1310 + "learning_rate": 4.5211878545211886e-05, + "loss": 1.0751, + "step": 1240 }, { "epoch": 11.0, - "learning_rate": 4.510921177587845e-05, - "loss": 0.7216, - "step": 1320 + "learning_rate": 4.512846179512846e-05, + "loss": 1.034, + "step": 1250 }, { "epoch": 11.0, - "learning_rate": 4.503007280785059e-05, - "loss": 0.5962, - "step": 1330 + "learning_rate": 4.5045045045045046e-05, + "loss": 0.6693, + "step": 1260 }, { "epoch": 11.01, - "learning_rate": 4.4950933839822725e-05, - "loss": 1.2384, - "step": 1340 + "learning_rate": 4.4961628294961636e-05, + "loss": 1.0748, + "step": 1270 }, { "epoch": 11.01, - "learning_rate": 4.4871794871794874e-05, - "loss": 0.8794, - "step": 1350 + "learning_rate": 4.487821154487821e-05, + "loss": 0.9789, + "step": 1280 }, { "epoch": 11.01, - "learning_rate": 4.479265590376702e-05, - "loss": 0.8364, - "step": 1360 + "learning_rate": 4.4794794794794796e-05, + "loss": 0.8083, + "step": 1290 }, { "epoch": 11.01, - "learning_rate": 4.471351693573916e-05, - "loss": 0.8106, - "step": 1370 + "learning_rate": 4.471137804471138e-05, + "loss": 0.9976, + "step": 1300 }, { "epoch": 11.01, - "learning_rate": 4.463437796771131e-05, - "loss": 0.7839, - "step": 1380 + "learning_rate": 4.462796129462796e-05, + "loss": 0.7674, + "step": 1310 }, { "epoch": 11.01, - "learning_rate": 4.455523899968344e-05, - "loss": 0.7032, - "step": 1390 + "learning_rate": 4.4544544544544546e-05, + "loss": 0.8705, + "step": 1320 }, { "epoch": 11.01, - "learning_rate": 4.4476100031655586e-05, - "loss": 0.9424, - "step": 1400 + "learning_rate": 4.446112779446113e-05, + "loss": 1.195, + "step": 1330 }, { "epoch": 11.02, - "learning_rate": 4.4396961063627735e-05, - "loss": 0.401, - "step": 1410 + "learning_rate": 4.437771104437771e-05, + "loss": 0.8805, + "step": 1340 }, { "epoch": 11.02, - "eval_accuracy": 0.7188940092165899, - "eval_loss": 0.6258705258369446, - "eval_runtime": 308.0396, - "eval_samples_per_second": 0.704, - "eval_steps_per_second": 0.179, - "step": 1416 - }, - { - "epoch": 12.0, - "learning_rate": 4.431782209559988e-05, - "loss": 0.4381, - "step": 1420 + "eval_accuracy": 0.7050691244239631, + "eval_loss": 0.871336042881012, + "eval_runtime": 256.7263, + "eval_samples_per_second": 0.845, + "eval_steps_per_second": 0.214, + "step": 1344 }, { "epoch": 12.0, - "learning_rate": 4.423868312757202e-05, - "loss": 0.5723, - "step": 1430 + "learning_rate": 4.42942942942943e-05, + "loss": 0.8859, + "step": 1350 }, { "epoch": 12.0, - "learning_rate": 4.415954415954416e-05, - "loss": 0.5368, - "step": 1440 + "learning_rate": 4.421087754421088e-05, + "loss": 1.2024, + "step": 1360 }, { "epoch": 12.0, - "learning_rate": 4.4080405191516304e-05, - "loss": 1.065, - "step": 1450 + "learning_rate": 4.4127460794127464e-05, + "loss": 0.9896, + "step": 1370 }, { "epoch": 12.01, - "learning_rate": 4.4001266223488446e-05, - "loss": 0.5411, - "step": 1460 + "learning_rate": 4.404404404404405e-05, + "loss": 0.9208, + "step": 1380 }, { "epoch": 12.01, - "learning_rate": 4.3922127255460595e-05, - "loss": 1.1746, - "step": 1470 + "learning_rate": 4.3960627293960624e-05, + "loss": 1.2375, + "step": 1390 }, { "epoch": 12.01, - "learning_rate": 4.384298828743274e-05, - "loss": 0.8506, - "step": 1480 + "learning_rate": 4.3877210543877214e-05, + "loss": 0.9493, + "step": 1400 }, { "epoch": 12.01, - "learning_rate": 4.376384931940488e-05, - "loss": 0.9014, - "step": 1490 + "learning_rate": 4.37937937937938e-05, + "loss": 0.9476, + "step": 1410 }, { "epoch": 12.01, - "learning_rate": 4.368471035137702e-05, - "loss": 0.7609, - "step": 1500 + "learning_rate": 4.3710377043710374e-05, + "loss": 0.8137, + "step": 1420 }, { "epoch": 12.01, - "learning_rate": 4.3605571383349164e-05, - "loss": 0.6263, - "step": 1510 + "learning_rate": 4.3626960293626964e-05, + "loss": 0.854, + "step": 1430 }, { "epoch": 12.01, - "learning_rate": 4.3526432415321306e-05, - "loss": 1.1741, - "step": 1520 + "learning_rate": 4.354354354354355e-05, + "loss": 1.0682, + "step": 1440 }, { "epoch": 12.02, - "learning_rate": 4.344729344729345e-05, - "loss": 0.5422, - "step": 1530 + "learning_rate": 4.3460126793460125e-05, + "loss": 1.2066, + "step": 1450 }, { "epoch": 12.02, - "eval_accuracy": 0.6774193548387096, - "eval_loss": 0.9453245997428894, - "eval_runtime": 323.3925, - "eval_samples_per_second": 0.671, - "eval_steps_per_second": 0.17, - "step": 1534 + "eval_accuracy": 0.5852534562211982, + "eval_loss": 0.9363781809806824, + "eval_runtime": 263.3253, + "eval_samples_per_second": 0.824, + "eval_steps_per_second": 0.209, + "step": 1456 }, { "epoch": 13.0, - "learning_rate": 4.336815447926559e-05, - "loss": 0.607, - "step": 1540 + "learning_rate": 4.3376710043376715e-05, + "loss": 0.7223, + "step": 1460 }, { "epoch": 13.0, - "learning_rate": 4.328901551123773e-05, - "loss": 0.7626, - "step": 1550 + "learning_rate": 4.32932932932933e-05, + "loss": 0.7549, + "step": 1470 }, { "epoch": 13.0, "learning_rate": 4.3209876543209875e-05, - "loss": 0.4373, - "step": 1560 + "loss": 0.9539, + "step": 1480 }, { "epoch": 13.01, - "learning_rate": 4.3130737575182024e-05, - "loss": 0.5643, - "step": 1570 + "learning_rate": 4.312645979312646e-05, + "loss": 0.8501, + "step": 1490 }, { "epoch": 13.01, - "learning_rate": 4.3051598607154166e-05, - "loss": 0.7982, - "step": 1580 + "learning_rate": 4.304304304304305e-05, + "loss": 0.9684, + "step": 1500 }, { "epoch": 13.01, - "learning_rate": 4.297245963912631e-05, - "loss": 0.971, - "step": 1590 + "learning_rate": 4.2959626292959625e-05, + "loss": 1.3551, + "step": 1510 }, { "epoch": 13.01, - "learning_rate": 4.289332067109845e-05, - "loss": 0.9764, - "step": 1600 + "learning_rate": 4.287620954287621e-05, + "loss": 0.9607, + "step": 1520 }, { "epoch": 13.01, - "learning_rate": 4.281418170307059e-05, - "loss": 0.8366, - "step": 1610 + "learning_rate": 4.27927927927928e-05, + "loss": 0.8907, + "step": 1530 }, { "epoch": 13.01, - "learning_rate": 4.2735042735042735e-05, - "loss": 1.0652, - "step": 1620 + "learning_rate": 4.2709376042709376e-05, + "loss": 1.2041, + "step": 1540 }, { "epoch": 13.01, - "learning_rate": 4.2655903767014884e-05, - "loss": 0.7619, - "step": 1630 - }, - { - "epoch": 13.02, - "learning_rate": 4.2576764798987026e-05, - "loss": 0.639, - "step": 1640 + "learning_rate": 4.262595929262596e-05, + "loss": 0.7261, + "step": 1550 }, { "epoch": 13.02, - "learning_rate": 4.249762583095916e-05, - "loss": 0.6852, - "step": 1650 + "learning_rate": 4.254254254254255e-05, + "loss": 0.9358, + "step": 1560 }, { "epoch": 13.02, - "eval_accuracy": 0.7004608294930875, - "eval_loss": 0.8649422526359558, - "eval_runtime": 301.7734, - "eval_samples_per_second": 0.719, - "eval_steps_per_second": 0.182, - "step": 1652 + "eval_accuracy": 0.5852534562211982, + "eval_loss": 0.910686731338501, + "eval_runtime": 265.3055, + "eval_samples_per_second": 0.818, + "eval_steps_per_second": 0.207, + "step": 1568 }, { "epoch": 14.0, - "learning_rate": 4.241848686293131e-05, - "loss": 0.9047, - "step": 1660 + "learning_rate": 4.2459125792459126e-05, + "loss": 0.9092, + "step": 1570 }, { "epoch": 14.0, - "learning_rate": 4.233934789490345e-05, - "loss": 1.1309, - "step": 1670 + "learning_rate": 4.237570904237571e-05, + "loss": 0.8376, + "step": 1580 }, { "epoch": 14.0, - "learning_rate": 4.2260208926875595e-05, - "loss": 0.7564, - "step": 1680 + "learning_rate": 4.229229229229229e-05, + "loss": 1.0121, + "step": 1590 }, { - "epoch": 14.01, - "learning_rate": 4.2181069958847744e-05, - "loss": 0.6608, - "step": 1690 + "epoch": 14.0, + "learning_rate": 4.2208875542208876e-05, + "loss": 0.9361, + "step": 1600 }, { "epoch": 14.01, - "learning_rate": 4.210193099081988e-05, - "loss": 0.5688, - "step": 1700 + "learning_rate": 4.212545879212546e-05, + "loss": 1.053, + "step": 1610 }, { "epoch": 14.01, - "learning_rate": 4.202279202279202e-05, - "loss": 0.6834, - "step": 1710 + "learning_rate": 4.204204204204204e-05, + "loss": 0.9652, + "step": 1620 }, { "epoch": 14.01, - "learning_rate": 4.194365305476417e-05, - "loss": 0.7658, - "step": 1720 + "learning_rate": 4.1958625291958627e-05, + "loss": 0.7609, + "step": 1630 }, { "epoch": 14.01, - "learning_rate": 4.186451408673631e-05, - "loss": 0.4818, - "step": 1730 + "learning_rate": 4.187520854187521e-05, + "loss": 1.0871, + "step": 1640 }, { "epoch": 14.01, - "learning_rate": 4.1785375118708455e-05, - "loss": 0.6077, - "step": 1740 + "learning_rate": 4.1791791791791793e-05, + "loss": 0.9353, + "step": 1650 }, { "epoch": 14.01, - "learning_rate": 4.17062361506806e-05, - "loss": 0.6641, - "step": 1750 + "learning_rate": 4.170837504170838e-05, + "loss": 0.7793, + "step": 1660 }, { "epoch": 14.02, - "learning_rate": 4.162709718265274e-05, - "loss": 0.4068, - "step": 1760 + "learning_rate": 4.162495829162496e-05, + "loss": 1.0073, + "step": 1670 }, { "epoch": 14.02, - "learning_rate": 4.154795821462488e-05, - "loss": 0.8469, - "step": 1770 + "learning_rate": 4.1541541541541544e-05, + "loss": 0.9043, + "step": 1680 }, { "epoch": 14.02, - "eval_accuracy": 0.6912442396313364, - "eval_loss": 0.9379397630691528, - "eval_runtime": 304.2958, - "eval_samples_per_second": 0.713, - "eval_steps_per_second": 0.181, - "step": 1770 + "eval_accuracy": 0.6359447004608295, + "eval_loss": 0.9146963953971863, + "eval_runtime": 259.9831, + "eval_samples_per_second": 0.835, + "eval_steps_per_second": 0.212, + "step": 1680 }, { "epoch": 15.0, - "learning_rate": 4.1468819246597024e-05, - "loss": 0.4658, - "step": 1780 + "learning_rate": 4.145812479145813e-05, + "loss": 1.0818, + "step": 1690 }, { "epoch": 15.0, - "learning_rate": 4.138968027856917e-05, - "loss": 0.4626, - "step": 1790 + "learning_rate": 4.137470804137471e-05, + "loss": 0.9315, + "step": 1700 }, { "epoch": 15.0, - "learning_rate": 4.131054131054131e-05, - "loss": 0.8248, - "step": 1800 - }, - { - "epoch": 15.01, - "learning_rate": 4.123140234251345e-05, - "loss": 0.883, - "step": 1810 + "learning_rate": 4.1291291291291294e-05, + "loss": 0.8777, + "step": 1710 }, { "epoch": 15.01, - "learning_rate": 4.11522633744856e-05, - "loss": 0.4465, - "step": 1820 + "learning_rate": 4.120787454120788e-05, + "loss": 1.0695, + "step": 1720 }, { "epoch": 15.01, - "learning_rate": 4.107312440645774e-05, - "loss": 0.696, - "step": 1830 + "learning_rate": 4.112445779112446e-05, + "loss": 0.6343, + "step": 1730 }, { "epoch": 15.01, - "learning_rate": 4.0993985438429884e-05, - "loss": 0.852, - "step": 1840 + "learning_rate": 4.1041041041041045e-05, + "loss": 1.0106, + "step": 1740 }, { "epoch": 15.01, - "learning_rate": 4.091484647040203e-05, - "loss": 0.7966, - "step": 1850 + "learning_rate": 4.095762429095763e-05, + "loss": 0.9499, + "step": 1750 }, { "epoch": 15.01, - "learning_rate": 4.083570750237417e-05, - "loss": 0.6836, - "step": 1860 + "learning_rate": 4.087420754087421e-05, + "loss": 0.9108, + "step": 1760 }, { "epoch": 15.01, - "learning_rate": 4.075656853434631e-05, - "loss": 0.8427, - "step": 1870 + "learning_rate": 4.0790790790790795e-05, + "loss": 0.8515, + "step": 1770 }, { "epoch": 15.02, - "learning_rate": 4.067742956631846e-05, - "loss": 0.8492, - "step": 1880 + "learning_rate": 4.070737404070737e-05, + "loss": 0.749, + "step": 1780 }, { "epoch": 15.02, - "eval_accuracy": 0.6451612903225806, - "eval_loss": 0.900291919708252, - "eval_runtime": 312.5208, - "eval_samples_per_second": 0.694, - "eval_steps_per_second": 0.176, - "step": 1888 + "learning_rate": 4.062395729062396e-05, + "loss": 0.8383, + "step": 1790 }, { - "epoch": 16.0, - "learning_rate": 4.05982905982906e-05, - "loss": 0.9333, - "step": 1890 + "epoch": 15.02, + "eval_accuracy": 0.6359447004608295, + "eval_loss": 0.945084810256958, + "eval_runtime": 259.6462, + "eval_samples_per_second": 0.836, + "eval_steps_per_second": 0.212, + "step": 1792 }, { "epoch": 16.0, - "learning_rate": 4.0519151630262745e-05, - "loss": 0.788, - "step": 1900 + "learning_rate": 4.0540540540540545e-05, + "loss": 0.8241, + "step": 1800 }, { "epoch": 16.0, - "learning_rate": 4.044001266223489e-05, - "loss": 0.6258, - "step": 1910 + "learning_rate": 4.045712379045712e-05, + "loss": 0.7361, + "step": 1810 }, { "epoch": 16.0, - "learning_rate": 4.036087369420703e-05, - "loss": 0.4548, - "step": 1920 + "learning_rate": 4.037370704037371e-05, + "loss": 0.978, + "step": 1820 }, { "epoch": 16.01, - "learning_rate": 4.028173472617917e-05, - "loss": 0.7268, - "step": 1930 + "learning_rate": 4.0290290290290296e-05, + "loss": 0.7197, + "step": 1830 }, { "epoch": 16.01, - "learning_rate": 4.020259575815132e-05, - "loss": 0.886, - "step": 1940 + "learning_rate": 4.020687354020687e-05, + "loss": 1.0322, + "step": 1840 }, { "epoch": 16.01, "learning_rate": 4.012345679012346e-05, - "loss": 0.6221, - "step": 1950 + "loss": 0.8089, + "step": 1850 }, { "epoch": 16.01, - "learning_rate": 4.00443178220956e-05, - "loss": 0.5802, - "step": 1960 + "learning_rate": 4.0040040040040046e-05, + "loss": 0.8855, + "step": 1860 }, { "epoch": 16.01, - "learning_rate": 3.996517885406775e-05, - "loss": 0.6743, - "step": 1970 + "learning_rate": 3.995662328995662e-05, + "loss": 0.9764, + "step": 1870 }, { "epoch": 16.01, - "learning_rate": 3.988603988603989e-05, - "loss": 0.6022, - "step": 1980 + "learning_rate": 3.9873206539873206e-05, + "loss": 0.7759, + "step": 1880 }, { "epoch": 16.01, - "learning_rate": 3.980690091801203e-05, - "loss": 0.6691, - "step": 1990 + "learning_rate": 3.9789789789789796e-05, + "loss": 0.5536, + "step": 1890 }, { "epoch": 16.02, - "learning_rate": 3.9727761949984174e-05, - "loss": 0.7633, - "step": 2000 + "learning_rate": 3.970637303970637e-05, + "loss": 0.7482, + "step": 1900 }, { "epoch": 16.02, - "eval_accuracy": 0.7235023041474654, - "eval_loss": 0.7601491212844849, - "eval_runtime": 299.492, - "eval_samples_per_second": 0.725, - "eval_steps_per_second": 0.184, - "step": 2006 - }, - { - "epoch": 17.0, - "learning_rate": 3.9648622981956316e-05, - "loss": 0.7329, - "step": 2010 + "eval_accuracy": 0.6221198156682027, + "eval_loss": 0.876456618309021, + "eval_runtime": 273.3147, + "eval_samples_per_second": 0.794, + "eval_steps_per_second": 0.201, + "step": 1904 }, { "epoch": 17.0, - "learning_rate": 3.956948401392846e-05, - "loss": 0.4407, - "step": 2020 + "learning_rate": 3.9622956289622956e-05, + "loss": 0.6662, + "step": 1910 }, { "epoch": 17.0, - "learning_rate": 3.94903450459006e-05, - "loss": 0.3618, - "step": 2030 + "learning_rate": 3.953953953953955e-05, + "loss": 0.857, + "step": 1920 }, { "epoch": 17.0, - "learning_rate": 3.941120607787275e-05, - "loss": 0.545, - "step": 2040 + "learning_rate": 3.945612278945612e-05, + "loss": 0.8507, + "step": 1930 }, { "epoch": 17.01, - "learning_rate": 3.933206710984489e-05, - "loss": 0.6322, - "step": 2050 + "learning_rate": 3.937270603937271e-05, + "loss": 1.1261, + "step": 1940 }, { "epoch": 17.01, - "learning_rate": 3.925292814181703e-05, - "loss": 0.7812, - "step": 2060 + "learning_rate": 3.92892892892893e-05, + "loss": 0.9851, + "step": 1950 }, { "epoch": 17.01, - "learning_rate": 3.9173789173789176e-05, - "loss": 0.5198, - "step": 2070 + "learning_rate": 3.9205872539205874e-05, + "loss": 0.5859, + "step": 1960 }, { "epoch": 17.01, - "learning_rate": 3.909465020576132e-05, - "loss": 0.563, - "step": 2080 + "learning_rate": 3.912245578912246e-05, + "loss": 0.9539, + "step": 1970 }, { "epoch": 17.01, - "learning_rate": 3.901551123773346e-05, - "loss": 0.7545, - "step": 2090 + "learning_rate": 3.903903903903904e-05, + "loss": 0.8663, + "step": 1980 }, { "epoch": 17.01, - "learning_rate": 3.893637226970561e-05, - "loss": 0.9906, - "step": 2100 + "learning_rate": 3.8955622288955624e-05, + "loss": 0.9946, + "step": 1990 }, { "epoch": 17.01, - "learning_rate": 3.8857233301677745e-05, - "loss": 0.807, - "step": 2110 + "learning_rate": 3.887220553887221e-05, + "loss": 0.835, + "step": 2000 }, { "epoch": 17.02, - "learning_rate": 3.877809433364989e-05, - "loss": 0.6063, - "step": 2120 + "learning_rate": 3.878878878878879e-05, + "loss": 0.9547, + "step": 2010 }, { "epoch": 17.02, - "eval_accuracy": 0.7788018433179723, - "eval_loss": 0.6181166768074036, - "eval_runtime": 296.2425, - "eval_samples_per_second": 0.733, - "eval_steps_per_second": 0.186, - "step": 2124 + "eval_accuracy": 0.7649769585253456, + "eval_loss": 0.7997832894325256, + "eval_runtime": 275.0933, + "eval_samples_per_second": 0.789, + "eval_steps_per_second": 0.2, + "step": 2016 }, { "epoch": 18.0, - "learning_rate": 3.8698955365622036e-05, - "loss": 0.8029, - "step": 2130 + "learning_rate": 3.8705372038705374e-05, + "loss": 0.8025, + "step": 2020 }, { "epoch": 18.0, - "learning_rate": 3.861981639759418e-05, - "loss": 0.7383, - "step": 2140 + "learning_rate": 3.862195528862196e-05, + "loss": 0.9637, + "step": 2030 }, { "epoch": 18.0, - "learning_rate": 3.854067742956632e-05, - "loss": 0.9192, - "step": 2150 + "learning_rate": 3.8538538538538534e-05, + "loss": 1.1231, + "step": 2040 }, { "epoch": 18.01, - "learning_rate": 3.846153846153846e-05, - "loss": 0.8748, - "step": 2160 + "learning_rate": 3.8455121788455125e-05, + "loss": 0.6693, + "step": 2050 }, { "epoch": 18.01, - "learning_rate": 3.8382399493510605e-05, - "loss": 0.5987, - "step": 2170 + "learning_rate": 3.837170503837171e-05, + "loss": 1.0535, + "step": 2060 }, { "epoch": 18.01, - "learning_rate": 3.830326052548275e-05, - "loss": 0.7523, - "step": 2180 + "learning_rate": 3.8288288288288285e-05, + "loss": 0.9, + "step": 2070 }, { "epoch": 18.01, - "learning_rate": 3.8224121557454896e-05, - "loss": 0.853, - "step": 2190 + "learning_rate": 3.8204871538204875e-05, + "loss": 0.7855, + "step": 2080 }, { "epoch": 18.01, - "learning_rate": 3.814498258942704e-05, - "loss": 0.7287, - "step": 2200 + "learning_rate": 3.812145478812146e-05, + "loss": 0.7349, + "step": 2090 }, { "epoch": 18.01, - "learning_rate": 3.806584362139918e-05, - "loss": 0.8661, - "step": 2210 + "learning_rate": 3.8038038038038035e-05, + "loss": 0.9599, + "step": 2100 }, { "epoch": 18.01, - "learning_rate": 3.798670465337132e-05, - "loss": 0.6155, - "step": 2220 + "learning_rate": 3.7954621287954625e-05, + "loss": 0.6589, + "step": 2110 }, { "epoch": 18.02, - "learning_rate": 3.7907565685343465e-05, - "loss": 0.792, - "step": 2230 + "learning_rate": 3.787120453787121e-05, + "loss": 0.7028, + "step": 2120 }, { "epoch": 18.02, - "learning_rate": 3.782842671731561e-05, - "loss": 0.6436, - "step": 2240 + "eval_accuracy": 0.6405529953917051, + "eval_loss": 0.9256826639175415, + "eval_runtime": 263.4393, + "eval_samples_per_second": 0.824, + "eval_steps_per_second": 0.209, + "step": 2128 }, { - "epoch": 18.02, - "eval_accuracy": 0.631336405529954, - "eval_loss": 0.9444882273674011, - "eval_runtime": 299.4212, - "eval_samples_per_second": 0.725, - "eval_steps_per_second": 0.184, - "step": 2242 + "epoch": 19.0, + "learning_rate": 3.7787787787787786e-05, + "loss": 0.5654, + "step": 2130 }, { "epoch": 19.0, - "learning_rate": 3.774928774928775e-05, - "loss": 0.6049, - "step": 2250 + "learning_rate": 3.7704371037704376e-05, + "loss": 0.8278, + "step": 2140 }, { "epoch": 19.0, - "learning_rate": 3.767014878125989e-05, - "loss": 0.6267, - "step": 2260 + "learning_rate": 3.762095428762096e-05, + "loss": 1.0629, + "step": 2150 }, { "epoch": 19.0, - "learning_rate": 3.7591009813232034e-05, - "loss": 0.4215, - "step": 2270 + "learning_rate": 3.7537537537537536e-05, + "loss": 1.026, + "step": 2160 }, { "epoch": 19.01, - "learning_rate": 3.7511870845204176e-05, - "loss": 0.9263, - "step": 2280 + "learning_rate": 3.745412078745412e-05, + "loss": 0.8544, + "step": 2170 }, { "epoch": 19.01, - "learning_rate": 3.7432731877176325e-05, - "loss": 0.4688, - "step": 2290 + "learning_rate": 3.737070403737071e-05, + "loss": 0.8669, + "step": 2180 }, { "epoch": 19.01, - "learning_rate": 3.735359290914847e-05, - "loss": 0.668, - "step": 2300 + "learning_rate": 3.7287287287287286e-05, + "loss": 0.6044, + "step": 2190 }, { "epoch": 19.01, - "learning_rate": 3.727445394112061e-05, - "loss": 0.5778, - "step": 2310 - }, - { - "epoch": 19.01, - "learning_rate": 3.719531497309275e-05, - "loss": 0.4659, - "step": 2320 + "learning_rate": 3.720387053720387e-05, + "loss": 0.8129, + "step": 2200 }, { "epoch": 19.01, - "learning_rate": 3.7116176005064894e-05, - "loss": 0.5316, - "step": 2330 + "learning_rate": 3.712045378712046e-05, + "loss": 0.5283, + "step": 2210 }, { "epoch": 19.01, "learning_rate": 3.7037037037037037e-05, - "loss": 0.6448, - "step": 2340 + "loss": 1.0115, + "step": 2220 }, { "epoch": 19.02, - "learning_rate": 3.6957898069009186e-05, - "loss": 0.5125, - "step": 2350 + "learning_rate": 3.695362028695362e-05, + "loss": 0.7743, + "step": 2230 }, { "epoch": 19.02, - "learning_rate": 3.687875910098133e-05, - "loss": 0.8931, - "step": 2360 + "learning_rate": 3.687020353687021e-05, + "loss": 0.8659, + "step": 2240 }, { "epoch": 19.02, - "eval_accuracy": 0.728110599078341, - "eval_loss": 0.8515065908432007, - "eval_runtime": 305.633, - "eval_samples_per_second": 0.71, - "eval_steps_per_second": 0.18, - "step": 2360 + "eval_accuracy": 0.5852534562211982, + "eval_loss": 1.0655064582824707, + "eval_runtime": 271.2554, + "eval_samples_per_second": 0.8, + "eval_steps_per_second": 0.203, + "step": 2240 }, { "epoch": 20.0, - "learning_rate": 3.679962013295346e-05, - "loss": 0.6621, - "step": 2370 + "learning_rate": 3.678678678678679e-05, + "loss": 0.8819, + "step": 2250 }, { "epoch": 20.0, - "learning_rate": 3.672048116492561e-05, - "loss": 0.5259, - "step": 2380 + "learning_rate": 3.670337003670337e-05, + "loss": 0.7077, + "step": 2260 }, { "epoch": 20.0, - "learning_rate": 3.6641342196897754e-05, - "loss": 0.3653, - "step": 2390 - }, - { - "epoch": 20.01, - "learning_rate": 3.65622032288699e-05, - "loss": 0.4086, - "step": 2400 + "learning_rate": 3.6619953286619954e-05, + "loss": 0.737, + "step": 2270 }, { "epoch": 20.01, - "learning_rate": 3.6483064260842046e-05, - "loss": 0.4137, - "step": 2410 + "learning_rate": 3.653653653653654e-05, + "loss": 0.8414, + "step": 2280 }, { "epoch": 20.01, - "learning_rate": 3.640392529281418e-05, - "loss": 1.0001, - "step": 2420 + "learning_rate": 3.645311978645312e-05, + "loss": 0.8566, + "step": 2290 }, { "epoch": 20.01, - "learning_rate": 3.6324786324786323e-05, - "loss": 0.7551, - "step": 2430 + "learning_rate": 3.6369703036369704e-05, + "loss": 0.8155, + "step": 2300 }, { "epoch": 20.01, - "learning_rate": 3.624564735675847e-05, - "loss": 0.7024, - "step": 2440 + "learning_rate": 3.628628628628629e-05, + "loss": 0.6651, + "step": 2310 }, { "epoch": 20.01, - "learning_rate": 3.6166508388730615e-05, - "loss": 0.6067, - "step": 2450 + "learning_rate": 3.620286953620287e-05, + "loss": 0.7734, + "step": 2320 }, { "epoch": 20.01, - "learning_rate": 3.608736942070276e-05, - "loss": 1.0071, - "step": 2460 + "learning_rate": 3.6119452786119454e-05, + "loss": 0.9056, + "step": 2330 }, { "epoch": 20.02, - "learning_rate": 3.60082304526749e-05, - "loss": 0.8599, - "step": 2470 + "learning_rate": 3.603603603603604e-05, + "loss": 0.9031, + "step": 2340 }, { "epoch": 20.02, - "eval_accuracy": 0.6359447004608295, - "eval_loss": 1.0786014795303345, - "eval_runtime": 314.2163, - "eval_samples_per_second": 0.691, - "eval_steps_per_second": 0.175, - "step": 2478 + "learning_rate": 3.595261928595262e-05, + "loss": 0.5591, + "step": 2350 }, { - "epoch": 21.0, - "learning_rate": 3.592909148464704e-05, - "loss": 0.803, - "step": 2480 + "epoch": 20.02, + "eval_accuracy": 0.576036866359447, + "eval_loss": 1.2793947458267212, + "eval_runtime": 265.9827, + "eval_samples_per_second": 0.816, + "eval_steps_per_second": 0.207, + "step": 2352 }, { "epoch": 21.0, - "learning_rate": 3.5849952516619184e-05, - "loss": 0.4828, - "step": 2490 + "learning_rate": 3.5869202535869205e-05, + "loss": 0.6663, + "step": 2360 }, { "epoch": 21.0, - "learning_rate": 3.5770813548591326e-05, - "loss": 0.3992, - "step": 2500 + "learning_rate": 3.578578578578579e-05, + "loss": 0.6847, + "step": 2370 }, { "epoch": 21.0, - "learning_rate": 3.5691674580563475e-05, - "loss": 0.62, - "step": 2510 + "learning_rate": 3.570236903570237e-05, + "loss": 0.8432, + "step": 2380 }, { "epoch": 21.01, - "learning_rate": 3.561253561253561e-05, - "loss": 0.7068, - "step": 2520 + "learning_rate": 3.5618952285618955e-05, + "loss": 0.6488, + "step": 2390 }, { "epoch": 21.01, - "learning_rate": 3.553339664450775e-05, - "loss": 0.3764, - "step": 2530 + "learning_rate": 3.553553553553554e-05, + "loss": 0.9092, + "step": 2400 }, { "epoch": 21.01, - "learning_rate": 3.54542576764799e-05, - "loss": 0.7479, - "step": 2540 + "learning_rate": 3.545211878545212e-05, + "loss": 0.8903, + "step": 2410 }, { "epoch": 21.01, - "learning_rate": 3.5375118708452044e-05, - "loss": 0.7272, - "step": 2550 + "learning_rate": 3.5368702035368706e-05, + "loss": 0.4662, + "step": 2420 }, { "epoch": 21.01, - "learning_rate": 3.5295979740424186e-05, - "loss": 0.653, - "step": 2560 + "learning_rate": 3.528528528528528e-05, + "loss": 0.9775, + "step": 2430 }, { "epoch": 21.01, - "learning_rate": 3.521684077239633e-05, - "loss": 0.5176, - "step": 2570 + "learning_rate": 3.520186853520187e-05, + "loss": 0.973, + "step": 2440 }, { "epoch": 21.01, - "learning_rate": 3.513770180436847e-05, - "loss": 0.7597, - "step": 2580 + "learning_rate": 3.5118451785118456e-05, + "loss": 0.5741, + "step": 2450 }, { "epoch": 21.02, - "learning_rate": 3.505856283634061e-05, - "loss": 0.5183, - "step": 2590 + "learning_rate": 3.503503503503503e-05, + "loss": 0.8963, + "step": 2460 }, { "epoch": 21.02, - "eval_accuracy": 0.6866359447004609, - "eval_loss": 0.948082447052002, - "eval_runtime": 300.3013, - "eval_samples_per_second": 0.723, - "eval_steps_per_second": 0.183, - "step": 2596 - }, - { - "epoch": 22.0, - "learning_rate": 3.497942386831276e-05, - "loss": 0.7119, - "step": 2600 + "eval_accuracy": 0.695852534562212, + "eval_loss": 1.0049302577972412, + "eval_runtime": 270.1661, + "eval_samples_per_second": 0.803, + "eval_steps_per_second": 0.204, + "step": 2464 }, { "epoch": 22.0, - "learning_rate": 3.4900284900284904e-05, - "loss": 0.6381, - "step": 2610 + "learning_rate": 3.495161828495162e-05, + "loss": 0.7073, + "step": 2470 }, { "epoch": 22.0, - "learning_rate": 3.4821145932257046e-05, - "loss": 0.8518, - "step": 2620 + "learning_rate": 3.4868201534868206e-05, + "loss": 0.5915, + "step": 2480 }, { "epoch": 22.0, - "learning_rate": 3.474200696422919e-05, - "loss": 0.9462, - "step": 2630 + "learning_rate": 3.478478478478478e-05, + "loss": 0.3567, + "step": 2490 }, { "epoch": 22.01, - "learning_rate": 3.466286799620133e-05, - "loss": 0.6183, - "step": 2640 + "learning_rate": 3.470136803470137e-05, + "loss": 0.7007, + "step": 2500 }, { "epoch": 22.01, - "learning_rate": 3.458372902817347e-05, - "loss": 0.7339, - "step": 2650 + "learning_rate": 3.4617951284617957e-05, + "loss": 0.4253, + "step": 2510 }, { "epoch": 22.01, - "learning_rate": 3.450459006014562e-05, - "loss": 0.8466, - "step": 2660 + "learning_rate": 3.453453453453453e-05, + "loss": 0.9297, + "step": 2520 }, { "epoch": 22.01, - "learning_rate": 3.4425451092117764e-05, - "loss": 0.6976, - "step": 2670 + "learning_rate": 3.4451117784451123e-05, + "loss": 0.9365, + "step": 2530 }, { "epoch": 22.01, - "learning_rate": 3.43463121240899e-05, - "loss": 0.9784, - "step": 2680 + "learning_rate": 3.436770103436771e-05, + "loss": 0.4607, + "step": 2540 }, { "epoch": 22.01, - "learning_rate": 3.426717315606205e-05, - "loss": 0.4932, - "step": 2690 + "learning_rate": 3.4284284284284284e-05, + "loss": 0.8208, + "step": 2550 }, { "epoch": 22.01, - "learning_rate": 3.418803418803419e-05, - "loss": 0.5164, - "step": 2700 + "learning_rate": 3.420086753420087e-05, + "loss": 0.8313, + "step": 2560 }, { "epoch": 22.02, - "learning_rate": 3.410889522000633e-05, - "loss": 0.7982, - "step": 2710 + "learning_rate": 3.411745078411746e-05, + "loss": 0.9221, + "step": 2570 }, { "epoch": 22.02, - "eval_accuracy": 0.7235023041474654, - "eval_loss": 0.8364368677139282, - "eval_runtime": 306.1906, - "eval_samples_per_second": 0.709, - "eval_steps_per_second": 0.18, - "step": 2714 + "eval_accuracy": 0.6082949308755761, + "eval_loss": 1.111289620399475, + "eval_runtime": 272.9644, + "eval_samples_per_second": 0.795, + "eval_steps_per_second": 0.201, + "step": 2576 }, { "epoch": 23.0, - "learning_rate": 3.4029756251978475e-05, - "loss": 0.6178, - "step": 2720 + "learning_rate": 3.4034034034034034e-05, + "loss": 0.8386, + "step": 2580 }, { "epoch": 23.0, "learning_rate": 3.395061728395062e-05, - "loss": 0.5603, - "step": 2730 + "loss": 0.549, + "step": 2590 }, { "epoch": 23.0, - "learning_rate": 3.387147831592276e-05, - "loss": 0.6577, - "step": 2740 + "learning_rate": 3.386720053386721e-05, + "loss": 1.0528, + "step": 2600 }, { "epoch": 23.01, - "learning_rate": 3.37923393478949e-05, - "loss": 0.7942, - "step": 2750 + "learning_rate": 3.3783783783783784e-05, + "loss": 0.6398, + "step": 2610 }, { "epoch": 23.01, - "learning_rate": 3.371320037986705e-05, - "loss": 0.4468, - "step": 2760 + "learning_rate": 3.370036703370037e-05, + "loss": 0.7276, + "step": 2620 }, { "epoch": 23.01, - "learning_rate": 3.363406141183919e-05, - "loss": 0.6931, - "step": 2770 + "learning_rate": 3.361695028361695e-05, + "loss": 0.8483, + "step": 2630 }, { "epoch": 23.01, - "learning_rate": 3.355492244381133e-05, - "loss": 0.5835, - "step": 2780 + "learning_rate": 3.3533533533533535e-05, + "loss": 0.5737, + "step": 2640 }, { "epoch": 23.01, - "learning_rate": 3.347578347578348e-05, - "loss": 0.7099, - "step": 2790 + "learning_rate": 3.345011678345012e-05, + "loss": 0.588, + "step": 2650 }, { "epoch": 23.01, - "learning_rate": 3.339664450775562e-05, - "loss": 0.6785, - "step": 2800 + "learning_rate": 3.33667000333667e-05, + "loss": 0.6416, + "step": 2660 }, { "epoch": 23.01, - "learning_rate": 3.331750553972776e-05, - "loss": 0.3025, - "step": 2810 - }, - { - "epoch": 23.02, - "learning_rate": 3.323836657169991e-05, - "loss": 0.9246, - "step": 2820 + "learning_rate": 3.3283283283283285e-05, + "loss": 0.7938, + "step": 2670 }, { "epoch": 23.02, - "learning_rate": 3.3159227603672046e-05, - "loss": 1.0003, - "step": 2830 + "learning_rate": 3.319986653319987e-05, + "loss": 0.7154, + "step": 2680 }, { "epoch": 23.02, - "eval_accuracy": 0.7327188940092166, - "eval_loss": 0.7810962796211243, - "eval_runtime": 298.2745, - "eval_samples_per_second": 0.728, - "eval_steps_per_second": 0.184, - "step": 2832 + "eval_accuracy": 0.6405529953917051, + "eval_loss": 0.9371297359466553, + "eval_runtime": 283.5798, + "eval_samples_per_second": 0.765, + "eval_steps_per_second": 0.194, + "step": 2688 }, { "epoch": 24.0, - "learning_rate": 3.308008863564419e-05, - "loss": 1.1583, - "step": 2840 + "learning_rate": 3.311644978311645e-05, + "loss": 0.4492, + "step": 2690 }, { "epoch": 24.0, - "learning_rate": 3.300094966761634e-05, - "loss": 0.4923, - "step": 2850 + "learning_rate": 3.3033033033033035e-05, + "loss": 0.6304, + "step": 2700 }, { "epoch": 24.0, - "learning_rate": 3.292181069958848e-05, - "loss": 0.4868, - "step": 2860 + "learning_rate": 3.294961628294962e-05, + "loss": 0.7286, + "step": 2710 }, { - "epoch": 24.01, - "learning_rate": 3.284267173156062e-05, - "loss": 0.7352, - "step": 2870 + "epoch": 24.0, + "learning_rate": 3.2866199532866195e-05, + "loss": 0.9193, + "step": 2720 }, { "epoch": 24.01, - "learning_rate": 3.2763532763532764e-05, - "loss": 0.8188, - "step": 2880 + "learning_rate": 3.2782782782782786e-05, + "loss": 0.7511, + "step": 2730 }, { "epoch": 24.01, - "learning_rate": 3.268439379550491e-05, - "loss": 0.8439, - "step": 2890 + "learning_rate": 3.269936603269937e-05, + "loss": 0.7076, + "step": 2740 }, { "epoch": 24.01, - "learning_rate": 3.260525482747705e-05, - "loss": 0.821, - "step": 2900 + "learning_rate": 3.2615949282615946e-05, + "loss": 0.7735, + "step": 2750 }, { "epoch": 24.01, - "learning_rate": 3.25261158594492e-05, - "loss": 0.6092, - "step": 2910 + "learning_rate": 3.2532532532532536e-05, + "loss": 0.9404, + "step": 2760 }, { "epoch": 24.01, - "learning_rate": 3.244697689142134e-05, - "loss": 0.7416, - "step": 2920 + "learning_rate": 3.244911578244912e-05, + "loss": 0.9553, + "step": 2770 }, { "epoch": 24.01, - "learning_rate": 3.236783792339348e-05, - "loss": 0.4177, - "step": 2930 + "learning_rate": 3.2365699032365696e-05, + "loss": 1.0067, + "step": 2780 }, { "epoch": 24.02, - "learning_rate": 3.2288698955365625e-05, - "loss": 0.5732, - "step": 2940 + "learning_rate": 3.2282282282282286e-05, + "loss": 0.7712, + "step": 2790 }, { "epoch": 24.02, - "learning_rate": 3.220955998733777e-05, - "loss": 0.6666, - "step": 2950 + "learning_rate": 3.219886553219887e-05, + "loss": 0.8795, + "step": 2800 }, { "epoch": 24.02, - "eval_accuracy": 0.7465437788018433, - "eval_loss": 0.7551702857017517, - "eval_runtime": 297.83, - "eval_samples_per_second": 0.729, - "eval_steps_per_second": 0.185, - "step": 2950 + "eval_accuracy": 0.7235023041474654, + "eval_loss": 0.6837921738624573, + "eval_runtime": 264.0243, + "eval_samples_per_second": 0.822, + "eval_steps_per_second": 0.208, + "step": 2800 }, { "epoch": 25.0, - "learning_rate": 3.213042101930991e-05, - "loss": 0.5516, - "step": 2960 + "learning_rate": 3.2115448782115447e-05, + "loss": 0.6295, + "step": 2810 }, { "epoch": 25.0, - "learning_rate": 3.205128205128206e-05, - "loss": 0.5643, - "step": 2970 + "learning_rate": 3.203203203203203e-05, + "loss": 0.5106, + "step": 2820 }, { "epoch": 25.0, - "learning_rate": 3.1972143083254193e-05, - "loss": 0.7, - "step": 2980 - }, - { - "epoch": 25.01, - "learning_rate": 3.1893004115226336e-05, - "loss": 0.4949, - "step": 2990 + "learning_rate": 3.194861528194862e-05, + "loss": 0.6515, + "step": 2830 }, { "epoch": 25.01, - "learning_rate": 3.181386514719848e-05, - "loss": 0.6588, - "step": 3000 + "learning_rate": 3.18651985318652e-05, + "loss": 0.5182, + "step": 2840 }, { "epoch": 25.01, - "learning_rate": 3.173472617917063e-05, - "loss": 0.6374, - "step": 3010 + "learning_rate": 3.178178178178178e-05, + "loss": 0.9393, + "step": 2850 }, { "epoch": 25.01, - "learning_rate": 3.165558721114277e-05, - "loss": 0.4516, - "step": 3020 + "learning_rate": 3.169836503169837e-05, + "loss": 0.6204, + "step": 2860 }, { "epoch": 25.01, - "learning_rate": 3.157644824311491e-05, - "loss": 0.6673, - "step": 3030 + "learning_rate": 3.161494828161495e-05, + "loss": 0.5407, + "step": 2870 }, { "epoch": 25.01, - "learning_rate": 3.1497309275087054e-05, - "loss": 0.6344, - "step": 3040 + "learning_rate": 3.153153153153153e-05, + "loss": 0.8556, + "step": 2880 }, { "epoch": 25.01, - "learning_rate": 3.1418170307059196e-05, - "loss": 0.422, - "step": 3050 + "learning_rate": 3.144811478144812e-05, + "loss": 0.6423, + "step": 2890 }, { "epoch": 25.02, - "learning_rate": 3.133903133903134e-05, - "loss": 0.8527, - "step": 3060 + "learning_rate": 3.13646980313647e-05, + "loss": 0.8295, + "step": 2900 }, { "epoch": 25.02, - "eval_accuracy": 0.7188940092165899, - "eval_loss": 0.8201001286506653, - "eval_runtime": 289.214, - "eval_samples_per_second": 0.75, - "eval_steps_per_second": 0.19, - "step": 3068 + "learning_rate": 3.128128128128128e-05, + "loss": 0.631, + "step": 2910 }, { - "epoch": 26.0, - "learning_rate": 3.125989237100349e-05, - "loss": 0.71, - "step": 3070 + "epoch": 25.02, + "eval_accuracy": 0.6129032258064516, + "eval_loss": 1.209276556968689, + "eval_runtime": 285.1576, + "eval_samples_per_second": 0.761, + "eval_steps_per_second": 0.193, + "step": 2912 }, { "epoch": 26.0, - "learning_rate": 3.118075340297563e-05, - "loss": 0.7812, - "step": 3080 + "learning_rate": 3.119786453119787e-05, + "loss": 0.7184, + "step": 2920 }, { "epoch": 26.0, - "learning_rate": 3.1101614434947765e-05, - "loss": 0.4559, - "step": 3090 + "learning_rate": 3.111444778111445e-05, + "loss": 0.6305, + "step": 2930 }, { "epoch": 26.0, - "learning_rate": 3.1022475466919914e-05, - "loss": 0.5269, - "step": 3100 + "learning_rate": 3.103103103103103e-05, + "loss": 0.7949, + "step": 2940 }, { "epoch": 26.01, - "learning_rate": 3.0943336498892056e-05, - "loss": 0.75, - "step": 3110 + "learning_rate": 3.0947614280947615e-05, + "loss": 1.2476, + "step": 2950 }, { "epoch": 26.01, "learning_rate": 3.08641975308642e-05, - "loss": 0.7118, - "step": 3120 + "loss": 0.7922, + "step": 2960 }, { "epoch": 26.01, - "learning_rate": 3.078505856283635e-05, - "loss": 0.7999, - "step": 3130 + "learning_rate": 3.078078078078078e-05, + "loss": 0.581, + "step": 2970 }, { "epoch": 26.01, - "learning_rate": 3.070591959480848e-05, - "loss": 0.4535, - "step": 3140 + "learning_rate": 3.0697364030697365e-05, + "loss": 0.6329, + "step": 2980 }, { "epoch": 26.01, - "learning_rate": 3.0626780626780625e-05, - "loss": 0.7403, - "step": 3150 + "learning_rate": 3.061394728061395e-05, + "loss": 0.8019, + "step": 2990 }, { "epoch": 26.01, - "learning_rate": 3.0547641658752774e-05, - "loss": 0.7067, - "step": 3160 + "learning_rate": 3.053053053053053e-05, + "loss": 0.7641, + "step": 3000 }, { "epoch": 26.01, - "learning_rate": 3.0468502690724916e-05, + "learning_rate": 3.044711378044712e-05, "loss": 0.6169, - "step": 3170 + "step": 3010 }, { "epoch": 26.02, - "learning_rate": 3.0389363722697055e-05, - "loss": 0.4678, - "step": 3180 + "learning_rate": 3.0363697030363696e-05, + "loss": 1.0489, + "step": 3020 }, { "epoch": 26.02, - "eval_accuracy": 0.695852534562212, - "eval_loss": 1.0259956121444702, - "eval_runtime": 293.3586, - "eval_samples_per_second": 0.74, - "eval_steps_per_second": 0.187, - "step": 3186 - }, - { - "epoch": 27.0, - "learning_rate": 3.0310224754669204e-05, - "loss": 0.4734, - "step": 3190 + "eval_accuracy": 0.5483870967741935, + "eval_loss": 1.472022294998169, + "eval_runtime": 262.7206, + "eval_samples_per_second": 0.826, + "eval_steps_per_second": 0.209, + "step": 3024 }, { "epoch": 27.0, - "learning_rate": 3.0231085786641343e-05, - "loss": 0.6945, - "step": 3200 + "learning_rate": 3.0280280280280282e-05, + "loss": 0.5064, + "step": 3030 }, { "epoch": 27.0, - "learning_rate": 3.0151946818613485e-05, - "loss": 0.667, - "step": 3210 + "learning_rate": 3.0196863530196866e-05, + "loss": 0.493, + "step": 3040 }, { "epoch": 27.0, - "learning_rate": 3.0072807850585634e-05, - "loss": 0.6138, - "step": 3220 + "learning_rate": 3.0113446780113446e-05, + "loss": 0.6668, + "step": 3050 }, { "epoch": 27.01, - "learning_rate": 2.9993668882557773e-05, - "loss": 0.7627, - "step": 3230 + "learning_rate": 3.0030030030030033e-05, + "loss": 0.5836, + "step": 3060 }, { "epoch": 27.01, - "learning_rate": 2.9914529914529915e-05, - "loss": 0.8703, - "step": 3240 + "learning_rate": 2.9946613279946616e-05, + "loss": 0.997, + "step": 3070 }, { "epoch": 27.01, - "learning_rate": 2.9835390946502057e-05, - "loss": 0.6306, - "step": 3250 + "learning_rate": 2.9863196529863196e-05, + "loss": 0.8393, + "step": 3080 }, { "epoch": 27.01, - "learning_rate": 2.9756251978474203e-05, - "loss": 0.7293, - "step": 3260 + "learning_rate": 2.9779779779779783e-05, + "loss": 0.609, + "step": 3090 }, { "epoch": 27.01, - "learning_rate": 2.9677113010446345e-05, - "loss": 0.5378, - "step": 3270 + "learning_rate": 2.9696363029696367e-05, + "loss": 0.7111, + "step": 3100 }, { "epoch": 27.01, - "learning_rate": 2.9597974042418487e-05, - "loss": 0.9667, - "step": 3280 + "learning_rate": 2.9612946279612947e-05, + "loss": 0.7491, + "step": 3110 }, { "epoch": 27.01, - "learning_rate": 2.9518835074390633e-05, - "loss": 0.4905, - "step": 3290 + "learning_rate": 2.952952952952953e-05, + "loss": 0.69, + "step": 3120 }, { "epoch": 27.02, - "learning_rate": 2.9439696106362775e-05, - "loss": 0.7354, - "step": 3300 + "learning_rate": 2.9446112779446117e-05, + "loss": 0.5881, + "step": 3130 }, { "epoch": 27.02, - "eval_accuracy": 0.6866359447004609, - "eval_loss": 0.8520135879516602, - "eval_runtime": 253.7054, - "eval_samples_per_second": 0.855, - "eval_steps_per_second": 0.217, - "step": 3304 + "eval_accuracy": 0.631336405529954, + "eval_loss": 1.1905186176300049, + "eval_runtime": 281.4297, + "eval_samples_per_second": 0.771, + "eval_steps_per_second": 0.195, + "step": 3136 }, { "epoch": 28.0, - "learning_rate": 2.9360557138334914e-05, - "loss": 0.4119, - "step": 3310 + "learning_rate": 2.9362696029362697e-05, + "loss": 1.0567, + "step": 3140 }, { "epoch": 28.0, - "learning_rate": 2.9281418170307063e-05, - "loss": 0.5532, - "step": 3320 + "learning_rate": 2.927927927927928e-05, + "loss": 0.5978, + "step": 3150 }, { "epoch": 28.0, - "learning_rate": 2.9202279202279202e-05, - "loss": 0.5374, - "step": 3330 + "learning_rate": 2.9195862529195867e-05, + "loss": 0.5434, + "step": 3160 }, { "epoch": 28.01, - "learning_rate": 2.9123140234251344e-05, - "loss": 0.4628, - "step": 3340 + "learning_rate": 2.9112445779112447e-05, + "loss": 0.6441, + "step": 3170 }, { "epoch": 28.01, - "learning_rate": 2.9044001266223493e-05, - "loss": 0.6509, - "step": 3350 + "learning_rate": 2.902902902902903e-05, + "loss": 0.8564, + "step": 3180 }, { "epoch": 28.01, - "learning_rate": 2.8964862298195632e-05, - "loss": 0.3157, - "step": 3360 + "learning_rate": 2.8945612278945618e-05, + "loss": 0.7492, + "step": 3190 }, { "epoch": 28.01, - "learning_rate": 2.8885723330167774e-05, - "loss": 0.5705, - "step": 3370 + "learning_rate": 2.8862195528862194e-05, + "loss": 0.7439, + "step": 3200 }, { "epoch": 28.01, - "learning_rate": 2.880658436213992e-05, - "loss": 0.5273, - "step": 3380 + "learning_rate": 2.877877877877878e-05, + "loss": 0.731, + "step": 3210 }, { "epoch": 28.01, - "learning_rate": 2.8727445394112062e-05, - "loss": 0.4847, - "step": 3390 + "learning_rate": 2.8695362028695365e-05, + "loss": 0.8052, + "step": 3220 }, { "epoch": 28.01, - "learning_rate": 2.8648306426084204e-05, - "loss": 0.5194, - "step": 3400 - }, - { - "epoch": 28.02, - "learning_rate": 2.856916745805635e-05, - "loss": 0.391, - "step": 3410 + "learning_rate": 2.8611945278611945e-05, + "loss": 0.8463, + "step": 3230 }, { "epoch": 28.02, - "learning_rate": 2.8490028490028492e-05, - "loss": 1.1097, - "step": 3420 + "learning_rate": 2.852852852852853e-05, + "loss": 0.7919, + "step": 3240 }, { "epoch": 28.02, - "eval_accuracy": 0.7327188940092166, - "eval_loss": 0.9238936901092529, - "eval_runtime": 306.5306, - "eval_samples_per_second": 0.708, - "eval_steps_per_second": 0.179, - "step": 3422 + "eval_accuracy": 0.576036866359447, + "eval_loss": 1.1292119026184082, + "eval_runtime": 300.8957, + "eval_samples_per_second": 0.721, + "eval_steps_per_second": 0.183, + "step": 3248 }, { "epoch": 29.0, - "learning_rate": 2.8410889522000634e-05, - "loss": 0.7316, - "step": 3430 + "learning_rate": 2.844511177844511e-05, + "loss": 0.9881, + "step": 3250 }, { "epoch": 29.0, - "learning_rate": 2.833175055397278e-05, - "loss": 0.5964, - "step": 3440 + "learning_rate": 2.8361695028361695e-05, + "loss": 0.4171, + "step": 3260 }, { "epoch": 29.0, - "learning_rate": 2.8252611585944922e-05, - "loss": 0.4294, - "step": 3450 + "learning_rate": 2.8278278278278282e-05, + "loss": 0.4612, + "step": 3270 }, { - "epoch": 29.01, - "learning_rate": 2.817347261791706e-05, - "loss": 0.5873, - "step": 3460 + "epoch": 29.0, + "learning_rate": 2.8194861528194862e-05, + "loss": 0.6761, + "step": 3280 }, { "epoch": 29.01, - "learning_rate": 2.8094333649889203e-05, - "loss": 0.5362, - "step": 3470 + "learning_rate": 2.8111444778111445e-05, + "loss": 0.4762, + "step": 3290 }, { "epoch": 29.01, - "learning_rate": 2.8015194681861352e-05, - "loss": 0.7654, - "step": 3480 + "learning_rate": 2.8028028028028032e-05, + "loss": 0.6633, + "step": 3300 }, { "epoch": 29.01, - "learning_rate": 2.793605571383349e-05, - "loss": 0.6749, - "step": 3490 + "learning_rate": 2.794461127794461e-05, + "loss": 0.5644, + "step": 3310 }, { "epoch": 29.01, - "learning_rate": 2.7856916745805633e-05, - "loss": 0.392, - "step": 3500 + "learning_rate": 2.7861194527861196e-05, + "loss": 0.9361, + "step": 3320 }, { "epoch": 29.01, "learning_rate": 2.777777777777778e-05, - "loss": 0.9069, - "step": 3510 + "loss": 0.8429, + "step": 3330 }, { "epoch": 29.01, - "learning_rate": 2.769863880974992e-05, - "loss": 0.4994, - "step": 3520 + "learning_rate": 2.769436102769436e-05, + "loss": 0.6224, + "step": 3340 }, { "epoch": 29.02, - "learning_rate": 2.7619499841722064e-05, - "loss": 0.7977, - "step": 3530 + "learning_rate": 2.7610944277610946e-05, + "loss": 0.8489, + "step": 3350 }, { "epoch": 29.02, - "learning_rate": 2.754036087369421e-05, - "loss": 0.6264, - "step": 3540 + "learning_rate": 2.752752752752753e-05, + "loss": 0.9158, + "step": 3360 }, { "epoch": 29.02, - "eval_accuracy": 0.7557603686635944, - "eval_loss": 0.6894146800041199, - "eval_runtime": 290.6314, - "eval_samples_per_second": 0.747, - "eval_steps_per_second": 0.189, - "step": 3540 + "eval_accuracy": 0.6359447004608295, + "eval_loss": 1.0214248895645142, + "eval_runtime": 281.3806, + "eval_samples_per_second": 0.771, + "eval_steps_per_second": 0.195, + "step": 3360 }, { "epoch": 30.0, - "learning_rate": 2.746122190566635e-05, - "loss": 1.0999, - "step": 3550 + "learning_rate": 2.744411077744411e-05, + "loss": 0.6866, + "step": 3370 }, { "epoch": 30.0, - "learning_rate": 2.7382082937638494e-05, - "loss": 0.5372, - "step": 3560 + "learning_rate": 2.7360694027360696e-05, + "loss": 0.8052, + "step": 3380 }, { "epoch": 30.0, - "learning_rate": 2.730294396961064e-05, - "loss": 0.595, - "step": 3570 - }, - { - "epoch": 30.01, - "learning_rate": 2.722380500158278e-05, - "loss": 0.9679, - "step": 3580 + "learning_rate": 2.727727727727728e-05, + "loss": 0.6658, + "step": 3390 }, { "epoch": 30.01, - "learning_rate": 2.714466603355492e-05, - "loss": 0.9635, - "step": 3590 + "learning_rate": 2.719386052719386e-05, + "loss": 0.6167, + "step": 3400 }, { "epoch": 30.01, - "learning_rate": 2.706552706552707e-05, - "loss": 0.3871, - "step": 3600 + "learning_rate": 2.7110443777110443e-05, + "loss": 0.5109, + "step": 3410 }, { "epoch": 30.01, - "learning_rate": 2.698638809749921e-05, - "loss": 0.4446, - "step": 3610 + "learning_rate": 2.702702702702703e-05, + "loss": 1.1015, + "step": 3420 }, { "epoch": 30.01, - "learning_rate": 2.690724912947135e-05, - "loss": 0.587, - "step": 3620 + "learning_rate": 2.694361027694361e-05, + "loss": 0.7845, + "step": 3430 }, { "epoch": 30.01, - "learning_rate": 2.68281101614435e-05, - "loss": 0.5359, - "step": 3630 + "learning_rate": 2.6860193526860194e-05, + "loss": 0.6872, + "step": 3440 }, { "epoch": 30.01, - "learning_rate": 2.6748971193415638e-05, - "loss": 0.7691, - "step": 3640 + "learning_rate": 2.677677677677678e-05, + "loss": 0.4606, + "step": 3450 }, { "epoch": 30.02, - "learning_rate": 2.666983222538778e-05, - "loss": 0.3348, - "step": 3650 + "learning_rate": 2.669336002669336e-05, + "loss": 0.7213, + "step": 3460 }, { "epoch": 30.02, - "eval_accuracy": 0.8064516129032258, - "eval_loss": 0.6229776740074158, - "eval_runtime": 293.7254, - "eval_samples_per_second": 0.739, - "eval_steps_per_second": 0.187, - "step": 3658 + "learning_rate": 2.6609943276609944e-05, + "loss": 0.8319, + "step": 3470 }, { - "epoch": 31.0, - "learning_rate": 2.6590693257359926e-05, - "loss": 0.9398, - "step": 3660 + "epoch": 30.02, + "eval_accuracy": 0.6682027649769585, + "eval_loss": 1.2861884832382202, + "eval_runtime": 276.1791, + "eval_samples_per_second": 0.786, + "eval_steps_per_second": 0.199, + "step": 3472 }, { "epoch": 31.0, - "learning_rate": 2.651155428933207e-05, - "loss": 0.6431, - "step": 3670 + "learning_rate": 2.652652652652653e-05, + "loss": 0.7171, + "step": 3480 }, { "epoch": 31.0, - "learning_rate": 2.643241532130421e-05, - "loss": 0.3879, - "step": 3680 + "learning_rate": 2.6443109776443108e-05, + "loss": 1.2605, + "step": 3490 }, { "epoch": 31.0, - "learning_rate": 2.6353276353276356e-05, - "loss": 0.4384, - "step": 3690 + "learning_rate": 2.6359693026359694e-05, + "loss": 0.7113, + "step": 3500 }, { "epoch": 31.01, - "learning_rate": 2.62741373852485e-05, - "loss": 0.6234, - "step": 3700 + "learning_rate": 2.6276276276276278e-05, + "loss": 0.4538, + "step": 3510 }, { "epoch": 31.01, - "learning_rate": 2.619499841722064e-05, - "loss": 0.7331, - "step": 3710 + "learning_rate": 2.6192859526192858e-05, + "loss": 0.5572, + "step": 3520 }, { "epoch": 31.01, - "learning_rate": 2.611585944919278e-05, - "loss": 0.3598, - "step": 3720 + "learning_rate": 2.6109442776109445e-05, + "loss": 0.7715, + "step": 3530 }, { "epoch": 31.01, - "learning_rate": 2.603672048116493e-05, - "loss": 0.6426, - "step": 3730 + "learning_rate": 2.6026026026026028e-05, + "loss": 1.0429, + "step": 3540 }, { "epoch": 31.01, - "learning_rate": 2.595758151313707e-05, - "loss": 0.5889, - "step": 3740 + "learning_rate": 2.5942609275942608e-05, + "loss": 0.7335, + "step": 3550 }, { "epoch": 31.01, - "learning_rate": 2.587844254510921e-05, - "loss": 0.6661, - "step": 3750 + "learning_rate": 2.5859192525859195e-05, + "loss": 0.7139, + "step": 3560 }, { "epoch": 31.01, - "learning_rate": 2.579930357708136e-05, - "loss": 0.672, - "step": 3760 + "learning_rate": 2.577577577577578e-05, + "loss": 0.422, + "step": 3570 }, { "epoch": 31.02, - "learning_rate": 2.5720164609053497e-05, - "loss": 0.5548, - "step": 3770 + "learning_rate": 2.569235902569236e-05, + "loss": 0.6775, + "step": 3580 }, { "epoch": 31.02, - "eval_accuracy": 0.8202764976958525, - "eval_loss": 0.6430536508560181, - "eval_runtime": 296.7673, - "eval_samples_per_second": 0.731, - "eval_steps_per_second": 0.185, - "step": 3776 - }, - { - "epoch": 32.0, - "learning_rate": 2.564102564102564e-05, - "loss": 0.6612, - "step": 3780 + "eval_accuracy": 0.6405529953917051, + "eval_loss": 1.0971248149871826, + "eval_runtime": 295.308, + "eval_samples_per_second": 0.735, + "eval_steps_per_second": 0.186, + "step": 3584 }, { "epoch": 32.0, - "learning_rate": 2.5561886672997785e-05, - "loss": 0.7013, - "step": 3790 + "learning_rate": 2.5608942275608942e-05, + "loss": 0.3834, + "step": 3590 }, { "epoch": 32.0, - "learning_rate": 2.5482747704969927e-05, - "loss": 0.7166, - "step": 3800 + "learning_rate": 2.552552552552553e-05, + "loss": 0.5938, + "step": 3600 }, { "epoch": 32.0, - "learning_rate": 2.540360873694207e-05, - "loss": 0.8458, - "step": 3810 + "learning_rate": 2.544210877544211e-05, + "loss": 0.6666, + "step": 3610 }, { "epoch": 32.01, - "learning_rate": 2.5324469768914215e-05, - "loss": 0.7025, - "step": 3820 + "learning_rate": 2.5358692025358692e-05, + "loss": 0.7156, + "step": 3620 }, { "epoch": 32.01, - "learning_rate": 2.5245330800886358e-05, - "loss": 0.4634, - "step": 3830 + "learning_rate": 2.527527527527528e-05, + "loss": 0.816, + "step": 3630 }, { "epoch": 32.01, - "learning_rate": 2.51661918328585e-05, - "loss": 0.7509, - "step": 3840 + "learning_rate": 2.519185852519186e-05, + "loss": 0.7641, + "step": 3640 }, { "epoch": 32.01, - "learning_rate": 2.5087052864830645e-05, - "loss": 0.5975, - "step": 3850 + "learning_rate": 2.5108441775108443e-05, + "loss": 0.7681, + "step": 3650 }, { "epoch": 32.01, - "learning_rate": 2.5007913896802788e-05, - "loss": 0.6689, - "step": 3860 + "learning_rate": 2.502502502502503e-05, + "loss": 0.9274, + "step": 3660 }, { "epoch": 32.01, - "learning_rate": 2.492877492877493e-05, - "loss": 0.4826, - "step": 3870 + "learning_rate": 2.494160827494161e-05, + "loss": 0.8763, + "step": 3670 }, { "epoch": 32.01, - "learning_rate": 2.4849635960747072e-05, - "loss": 0.4976, - "step": 3880 + "learning_rate": 2.4858191524858193e-05, + "loss": 0.5928, + "step": 3680 }, { "epoch": 32.02, - "learning_rate": 2.4770496992719218e-05, - "loss": 0.4242, - "step": 3890 + "learning_rate": 2.4774774774774777e-05, + "loss": 0.7191, + "step": 3690 }, { "epoch": 32.02, - "eval_accuracy": 0.7050691244239631, - "eval_loss": 0.8081349730491638, - "eval_runtime": 298.374, - "eval_samples_per_second": 0.727, - "eval_steps_per_second": 0.184, - "step": 3894 + "eval_accuracy": 0.6497695852534562, + "eval_loss": 1.026401400566101, + "eval_runtime": 280.0325, + "eval_samples_per_second": 0.775, + "eval_steps_per_second": 0.196, + "step": 3696 }, { "epoch": 33.0, "learning_rate": 2.4691358024691357e-05, - "loss": 0.3113, - "step": 3900 + "loss": 0.9832, + "step": 3700 }, { "epoch": 33.0, - "learning_rate": 2.4612219056663502e-05, - "loss": 0.6925, - "step": 3910 + "learning_rate": 2.4607941274607943e-05, + "loss": 0.7523, + "step": 3710 }, { "epoch": 33.0, - "learning_rate": 2.4533080088635644e-05, - "loss": 0.6335, - "step": 3920 + "learning_rate": 2.4524524524524527e-05, + "loss": 0.6381, + "step": 3720 }, { "epoch": 33.01, - "learning_rate": 2.4453941120607787e-05, - "loss": 0.5331, - "step": 3930 + "learning_rate": 2.4441107774441107e-05, + "loss": 0.7641, + "step": 3730 }, { "epoch": 33.01, - "learning_rate": 2.4374802152579932e-05, - "loss": 0.6092, - "step": 3940 + "learning_rate": 2.4357691024357694e-05, + "loss": 0.5699, + "step": 3740 }, { "epoch": 33.01, - "learning_rate": 2.4295663184552074e-05, - "loss": 0.534, - "step": 3950 + "learning_rate": 2.4274274274274274e-05, + "loss": 0.5342, + "step": 3750 }, { "epoch": 33.01, - "learning_rate": 2.4216524216524217e-05, - "loss": 0.4707, - "step": 3960 + "learning_rate": 2.4190857524190857e-05, + "loss": 0.4933, + "step": 3760 }, { "epoch": 33.01, - "learning_rate": 2.4137385248496362e-05, - "loss": 0.5926, - "step": 3970 + "learning_rate": 2.4107440774107444e-05, + "loss": 0.7027, + "step": 3770 }, { "epoch": 33.01, - "learning_rate": 2.4058246280468505e-05, - "loss": 0.2674, - "step": 3980 + "learning_rate": 2.4024024024024024e-05, + "loss": 0.5812, + "step": 3780 }, { "epoch": 33.01, - "learning_rate": 2.3979107312440647e-05, - "loss": 0.9024, - "step": 3990 - }, - { - "epoch": 33.02, - "learning_rate": 2.389996834441279e-05, - "loss": 0.3402, - "step": 4000 + "learning_rate": 2.3940607273940608e-05, + "loss": 0.5228, + "step": 3790 }, { "epoch": 33.02, - "learning_rate": 2.3820829376384935e-05, - "loss": 0.5805, - "step": 4010 + "learning_rate": 2.385719052385719e-05, + "loss": 0.7662, + "step": 3800 }, { "epoch": 33.02, - "eval_accuracy": 0.8202764976958525, - "eval_loss": 0.5598491430282593, - "eval_runtime": 287.693, - "eval_samples_per_second": 0.754, - "eval_steps_per_second": 0.191, - "step": 4012 + "eval_accuracy": 0.6405529953917051, + "eval_loss": 1.0589454174041748, + "eval_runtime": 290.7017, + "eval_samples_per_second": 0.746, + "eval_steps_per_second": 0.189, + "step": 3808 }, { "epoch": 34.0, - "learning_rate": 2.3741690408357077e-05, - "loss": 0.6835, - "step": 4020 + "learning_rate": 2.3773773773773775e-05, + "loss": 0.5944, + "step": 3810 }, { "epoch": 34.0, - "learning_rate": 2.366255144032922e-05, - "loss": 0.6466, - "step": 4030 + "learning_rate": 2.3690357023690358e-05, + "loss": 0.588, + "step": 3820 }, { "epoch": 34.0, - "learning_rate": 2.358341247230136e-05, - "loss": 0.3698, - "step": 4040 + "learning_rate": 2.360694027360694e-05, + "loss": 0.6972, + "step": 3830 }, { - "epoch": 34.01, - "learning_rate": 2.3504273504273504e-05, - "loss": 0.4723, - "step": 4050 + "epoch": 34.0, + "learning_rate": 2.3523523523523525e-05, + "loss": 0.544, + "step": 3840 }, { "epoch": 34.01, - "learning_rate": 2.342513453624565e-05, - "loss": 0.7078, - "step": 4060 + "learning_rate": 2.344010677344011e-05, + "loss": 0.6615, + "step": 3850 }, { "epoch": 34.01, - "learning_rate": 2.334599556821779e-05, - "loss": 0.6876, - "step": 4070 + "learning_rate": 2.3356690023356692e-05, + "loss": 0.5002, + "step": 3860 }, { "epoch": 34.01, - "learning_rate": 2.3266856600189934e-05, - "loss": 0.4859, - "step": 4080 + "learning_rate": 2.3273273273273275e-05, + "loss": 0.5654, + "step": 3870 }, { "epoch": 34.01, - "learning_rate": 2.318771763216208e-05, - "loss": 0.5394, - "step": 4090 + "learning_rate": 2.3189856523189855e-05, + "loss": 0.6728, + "step": 3880 }, { "epoch": 34.01, - "learning_rate": 2.310857866413422e-05, - "loss": 0.4636, - "step": 4100 + "learning_rate": 2.3106439773106442e-05, + "loss": 0.5296, + "step": 3890 }, { "epoch": 34.01, - "learning_rate": 2.3029439696106364e-05, - "loss": 0.5758, - "step": 4110 + "learning_rate": 2.3023023023023026e-05, + "loss": 0.4234, + "step": 3900 }, { "epoch": 34.02, - "learning_rate": 2.295030072807851e-05, - "loss": 0.4957, - "step": 4120 + "learning_rate": 2.2939606272939606e-05, + "loss": 0.5872, + "step": 3910 }, { "epoch": 34.02, - "learning_rate": 2.2871161760050648e-05, - "loss": 0.7064, - "step": 4130 + "learning_rate": 2.2856189522856192e-05, + "loss": 0.7313, + "step": 3920 }, { "epoch": 34.02, - "eval_accuracy": 0.7926267281105991, - "eval_loss": 0.7340723276138306, - "eval_runtime": 254.6894, - "eval_samples_per_second": 0.852, - "eval_steps_per_second": 0.216, - "step": 4130 + "eval_accuracy": 0.5622119815668203, + "eval_loss": 1.5075870752334595, + "eval_runtime": 286.0606, + "eval_samples_per_second": 0.759, + "eval_steps_per_second": 0.192, + "step": 3920 }, { "epoch": 35.0, - "learning_rate": 2.2792022792022794e-05, - "loss": 0.4694, - "step": 4140 + "learning_rate": 2.2772772772772773e-05, + "loss": 0.5481, + "step": 3930 }, { "epoch": 35.0, - "learning_rate": 2.2712883823994936e-05, - "loss": 0.6956, - "step": 4150 + "learning_rate": 2.2689356022689356e-05, + "loss": 0.7286, + "step": 3940 }, { "epoch": 35.0, - "learning_rate": 2.2633744855967078e-05, - "loss": 0.4924, - "step": 4160 - }, - { - "epoch": 35.01, - "learning_rate": 2.2554605887939224e-05, - "loss": 0.7678, - "step": 4170 + "learning_rate": 2.2605939272605943e-05, + "loss": 0.795, + "step": 3950 }, { "epoch": 35.01, - "learning_rate": 2.2475466919911363e-05, - "loss": 0.6094, - "step": 4180 + "learning_rate": 2.2522522522522523e-05, + "loss": 0.7738, + "step": 3960 }, { "epoch": 35.01, - "learning_rate": 2.239632795188351e-05, - "loss": 0.3326, - "step": 4190 + "learning_rate": 2.2439105772439106e-05, + "loss": 0.8864, + "step": 3970 }, { "epoch": 35.01, - "learning_rate": 2.2317188983855654e-05, - "loss": 0.623, - "step": 4200 + "learning_rate": 2.235568902235569e-05, + "loss": 0.3864, + "step": 3980 }, { "epoch": 35.01, - "learning_rate": 2.2238050015827793e-05, - "loss": 0.6507, - "step": 4210 + "learning_rate": 2.2272272272272273e-05, + "loss": 0.6556, + "step": 3990 }, { "epoch": 35.01, - "learning_rate": 2.215891104779994e-05, - "loss": 0.5673, - "step": 4220 + "learning_rate": 2.2188855522188857e-05, + "loss": 0.9211, + "step": 4000 }, { "epoch": 35.01, - "learning_rate": 2.207977207977208e-05, - "loss": 0.9691, - "step": 4230 + "learning_rate": 2.210543877210544e-05, + "loss": 0.6319, + "step": 4010 }, { "epoch": 35.02, - "learning_rate": 2.2000633111744223e-05, - "loss": 0.2534, - "step": 4240 + "learning_rate": 2.2022022022022024e-05, + "loss": 0.6768, + "step": 4020 }, { "epoch": 35.02, - "eval_accuracy": 0.783410138248848, - "eval_loss": 0.6685347557067871, - "eval_runtime": 253.8074, - "eval_samples_per_second": 0.855, - "eval_steps_per_second": 0.217, - "step": 4248 + "learning_rate": 2.1938605271938607e-05, + "loss": 0.7539, + "step": 4030 }, { - "epoch": 36.0, - "learning_rate": 2.192149414371637e-05, - "loss": 0.5305, - "step": 4250 + "epoch": 35.02, + "eval_accuracy": 0.5898617511520737, + "eval_loss": 1.2265080213546753, + "eval_runtime": 266.6298, + "eval_samples_per_second": 0.814, + "eval_steps_per_second": 0.206, + "step": 4032 }, { "epoch": 36.0, - "learning_rate": 2.184235517568851e-05, - "loss": 0.3181, - "step": 4260 + "learning_rate": 2.1855188521855187e-05, + "loss": 0.6197, + "step": 4040 }, { "epoch": 36.0, - "learning_rate": 2.1763216207660653e-05, - "loss": 0.2655, - "step": 4270 + "learning_rate": 2.1771771771771774e-05, + "loss": 0.5972, + "step": 4050 }, { "epoch": 36.0, - "learning_rate": 2.1684077239632795e-05, - "loss": 0.546, - "step": 4280 + "learning_rate": 2.1688355021688357e-05, + "loss": 0.9935, + "step": 4060 }, { "epoch": 36.01, "learning_rate": 2.1604938271604937e-05, - "loss": 0.5166, - "step": 4290 + "loss": 0.6247, + "step": 4070 }, { "epoch": 36.01, - "learning_rate": 2.1525799303577083e-05, - "loss": 0.612, - "step": 4300 + "learning_rate": 2.1521521521521524e-05, + "loss": 0.6728, + "step": 4080 }, { "epoch": 36.01, - "learning_rate": 2.1446660335549225e-05, - "loss": 1.0337, - "step": 4310 + "learning_rate": 2.1438104771438104e-05, + "loss": 0.8943, + "step": 4090 }, { "epoch": 36.01, - "learning_rate": 2.1367521367521368e-05, - "loss": 0.4693, - "step": 4320 + "learning_rate": 2.1354688021354688e-05, + "loss": 0.6704, + "step": 4100 }, { "epoch": 36.01, - "learning_rate": 2.1288382399493513e-05, - "loss": 0.535, - "step": 4330 + "learning_rate": 2.1271271271271275e-05, + "loss": 0.8664, + "step": 4110 }, { "epoch": 36.01, - "learning_rate": 2.1209243431465655e-05, - "loss": 0.2098, - "step": 4340 + "learning_rate": 2.1187854521187855e-05, + "loss": 0.8486, + "step": 4120 }, { "epoch": 36.01, - "learning_rate": 2.1130104463437798e-05, - "loss": 0.5005, - "step": 4350 + "learning_rate": 2.1104437771104438e-05, + "loss": 0.5425, + "step": 4130 }, { "epoch": 36.02, - "learning_rate": 2.105096549540994e-05, - "loss": 0.7578, - "step": 4360 + "learning_rate": 2.102102102102102e-05, + "loss": 0.571, + "step": 4140 }, { "epoch": 36.02, - "eval_accuracy": 0.7603686635944701, - "eval_loss": 0.7591729760169983, - "eval_runtime": 256.6972, - "eval_samples_per_second": 0.845, - "eval_steps_per_second": 0.214, - "step": 4366 - }, - { - "epoch": 37.0, - "learning_rate": 2.0971826527382085e-05, - "loss": 0.7535, - "step": 4370 + "eval_accuracy": 0.6267281105990783, + "eval_loss": 1.1598420143127441, + "eval_runtime": 288.0962, + "eval_samples_per_second": 0.753, + "eval_steps_per_second": 0.191, + "step": 4144 }, { "epoch": 37.0, - "learning_rate": 2.0892687559354228e-05, - "loss": 0.5906, - "step": 4380 + "learning_rate": 2.0937604270937605e-05, + "loss": 0.2915, + "step": 4150 }, { "epoch": 37.0, - "learning_rate": 2.081354859132637e-05, - "loss": 0.5422, - "step": 4390 + "learning_rate": 2.085418752085419e-05, + "loss": 0.5187, + "step": 4160 }, { "epoch": 37.0, - "learning_rate": 2.0734409623298512e-05, - "loss": 0.4581, - "step": 4400 + "learning_rate": 2.0770770770770772e-05, + "loss": 0.4894, + "step": 4170 }, { "epoch": 37.01, - "learning_rate": 2.0655270655270654e-05, - "loss": 0.4551, - "step": 4410 + "learning_rate": 2.0687354020687355e-05, + "loss": 0.522, + "step": 4180 }, { "epoch": 37.01, - "learning_rate": 2.05761316872428e-05, - "loss": 0.528, - "step": 4420 + "learning_rate": 2.060393727060394e-05, + "loss": 0.3428, + "step": 4190 }, { "epoch": 37.01, - "learning_rate": 2.0496992719214942e-05, - "loss": 0.2991, - "step": 4430 + "learning_rate": 2.0520520520520522e-05, + "loss": 0.4749, + "step": 4200 }, { "epoch": 37.01, - "learning_rate": 2.0417853751187084e-05, - "loss": 0.8417, - "step": 4440 + "learning_rate": 2.0437103770437106e-05, + "loss": 0.9899, + "step": 4210 }, { "epoch": 37.01, - "learning_rate": 2.033871478315923e-05, - "loss": 0.924, - "step": 4450 + "learning_rate": 2.0353687020353686e-05, + "loss": 0.7875, + "step": 4220 }, { "epoch": 37.01, - "learning_rate": 2.0259575815131372e-05, - "loss": 0.5173, - "step": 4460 + "learning_rate": 2.0270270270270273e-05, + "loss": 0.4509, + "step": 4230 }, { "epoch": 37.01, - "learning_rate": 2.0180436847103515e-05, - "loss": 0.3303, - "step": 4470 + "learning_rate": 2.0186853520186856e-05, + "loss": 0.5523, + "step": 4240 }, { "epoch": 37.02, - "learning_rate": 2.010129787907566e-05, - "loss": 0.5822, - "step": 4480 + "learning_rate": 2.0103436770103436e-05, + "loss": 0.3404, + "step": 4250 }, { "epoch": 37.02, - "eval_accuracy": 0.728110599078341, - "eval_loss": 0.9471691250801086, - "eval_runtime": 259.2305, - "eval_samples_per_second": 0.837, - "eval_steps_per_second": 0.212, - "step": 4484 + "eval_accuracy": 0.6359447004608295, + "eval_loss": 1.0306791067123413, + "eval_runtime": 260.5195, + "eval_samples_per_second": 0.833, + "eval_steps_per_second": 0.211, + "step": 4256 }, { "epoch": 38.0, - "learning_rate": 2.00221589110478e-05, - "loss": 0.3202, - "step": 4490 + "learning_rate": 2.0020020020020023e-05, + "loss": 0.6409, + "step": 4260 }, { "epoch": 38.0, - "learning_rate": 1.9943019943019945e-05, - "loss": 0.4554, - "step": 4500 + "learning_rate": 1.9936603269936603e-05, + "loss": 0.9606, + "step": 4270 }, { "epoch": 38.0, - "learning_rate": 1.9863880974992087e-05, - "loss": 0.5358, - "step": 4510 + "learning_rate": 1.9853186519853186e-05, + "loss": 0.5896, + "step": 4280 }, { "epoch": 38.01, - "learning_rate": 1.978474200696423e-05, - "loss": 0.3341, - "step": 4520 + "learning_rate": 1.9769769769769773e-05, + "loss": 1.0073, + "step": 4290 }, { "epoch": 38.01, - "learning_rate": 1.9705603038936375e-05, - "loss": 1.1299, - "step": 4530 + "learning_rate": 1.9686353019686353e-05, + "loss": 0.7469, + "step": 4300 }, { "epoch": 38.01, - "learning_rate": 1.9626464070908514e-05, - "loss": 0.4959, - "step": 4540 + "learning_rate": 1.9602936269602937e-05, + "loss": 0.9638, + "step": 4310 }, { "epoch": 38.01, - "learning_rate": 1.954732510288066e-05, - "loss": 0.2519, - "step": 4550 + "learning_rate": 1.951951951951952e-05, + "loss": 0.2893, + "step": 4320 }, { "epoch": 38.01, - "learning_rate": 1.9468186134852805e-05, - "loss": 0.9426, - "step": 4560 + "learning_rate": 1.9436102769436104e-05, + "loss": 0.9141, + "step": 4330 }, { "epoch": 38.01, - "learning_rate": 1.9389047166824944e-05, - "loss": 0.3135, - "step": 4570 + "learning_rate": 1.9352686019352687e-05, + "loss": 0.4846, + "step": 4340 }, { "epoch": 38.01, - "learning_rate": 1.930990819879709e-05, - "loss": 0.528, - "step": 4580 - }, - { - "epoch": 38.02, - "learning_rate": 1.923076923076923e-05, - "loss": 0.8759, - "step": 4590 + "learning_rate": 1.9269269269269267e-05, + "loss": 0.2622, + "step": 4350 }, { "epoch": 38.02, - "learning_rate": 1.9151630262741374e-05, - "loss": 0.2939, - "step": 4600 + "learning_rate": 1.9185852519185854e-05, + "loss": 0.5553, + "step": 4360 }, { "epoch": 38.02, - "eval_accuracy": 0.728110599078341, - "eval_loss": 0.8887839317321777, - "eval_runtime": 260.522, - "eval_samples_per_second": 0.833, - "eval_steps_per_second": 0.211, - "step": 4602 + "eval_accuracy": 0.7235023041474654, + "eval_loss": 0.8180494904518127, + "eval_runtime": 302.1979, + "eval_samples_per_second": 0.718, + "eval_steps_per_second": 0.182, + "step": 4368 }, { "epoch": 39.0, - "learning_rate": 1.907249129471352e-05, - "loss": 0.7159, - "step": 4610 + "learning_rate": 1.9102435769102438e-05, + "loss": 0.7832, + "step": 4370 }, { "epoch": 39.0, - "learning_rate": 1.899335232668566e-05, - "loss": 0.7495, - "step": 4620 + "learning_rate": 1.9019019019019018e-05, + "loss": 0.7238, + "step": 4380 }, { "epoch": 39.0, - "learning_rate": 1.8914213358657804e-05, - "loss": 0.6435, - "step": 4630 + "learning_rate": 1.8935602268935604e-05, + "loss": 0.2628, + "step": 4390 }, { - "epoch": 39.01, - "learning_rate": 1.8835074390629946e-05, - "loss": 0.5865, - "step": 4640 + "epoch": 39.0, + "learning_rate": 1.8852185518852188e-05, + "loss": 0.7441, + "step": 4400 }, { "epoch": 39.01, - "learning_rate": 1.8755935422602088e-05, - "loss": 0.5622, - "step": 4650 + "learning_rate": 1.8768768768768768e-05, + "loss": 0.4422, + "step": 4410 }, { "epoch": 39.01, - "learning_rate": 1.8676796454574234e-05, - "loss": 0.4081, - "step": 4660 + "learning_rate": 1.8685352018685355e-05, + "loss": 0.5266, + "step": 4420 }, { "epoch": 39.01, - "learning_rate": 1.8597657486546376e-05, - "loss": 0.5133, - "step": 4670 + "learning_rate": 1.8601935268601935e-05, + "loss": 0.5469, + "step": 4430 }, { "epoch": 39.01, "learning_rate": 1.8518518518518518e-05, - "loss": 0.5643, - "step": 4680 + "loss": 0.9752, + "step": 4440 }, { "epoch": 39.01, - "learning_rate": 1.8439379550490664e-05, - "loss": 0.5888, - "step": 4690 + "learning_rate": 1.8435101768435105e-05, + "loss": 1.0334, + "step": 4450 }, { "epoch": 39.01, - "learning_rate": 1.8360240582462806e-05, - "loss": 0.7294, - "step": 4700 + "learning_rate": 1.8351685018351685e-05, + "loss": 0.604, + "step": 4460 }, { "epoch": 39.02, - "learning_rate": 1.828110161443495e-05, - "loss": 0.4845, - "step": 4710 + "learning_rate": 1.826826826826827e-05, + "loss": 0.7227, + "step": 4470 }, { "epoch": 39.02, - "learning_rate": 1.820196264640709e-05, - "loss": 0.4795, - "step": 4720 + "learning_rate": 1.8184851518184852e-05, + "loss": 0.8499, + "step": 4480 }, { "epoch": 39.02, - "eval_accuracy": 0.663594470046083, - "eval_loss": 1.0767979621887207, - "eval_runtime": 260.495, - "eval_samples_per_second": 0.833, - "eval_steps_per_second": 0.211, - "step": 4720 + "eval_accuracy": 0.6497695852534562, + "eval_loss": 1.0074414014816284, + "eval_runtime": 296.0859, + "eval_samples_per_second": 0.733, + "eval_steps_per_second": 0.186, + "step": 4480 }, { "epoch": 40.0, - "learning_rate": 1.8122823678379236e-05, - "loss": 0.3626, - "step": 4730 + "learning_rate": 1.8101434768101436e-05, + "loss": 0.3984, + "step": 4490 }, { "epoch": 40.0, - "learning_rate": 1.804368471035138e-05, - "loss": 0.5294, - "step": 4740 + "learning_rate": 1.801801801801802e-05, + "loss": 0.5711, + "step": 4500 }, { "epoch": 40.0, - "learning_rate": 1.796454574232352e-05, - "loss": 0.8263, - "step": 4750 - }, - { - "epoch": 40.01, - "learning_rate": 1.7885406774295663e-05, - "loss": 0.6514, - "step": 4760 + "learning_rate": 1.7934601267934602e-05, + "loss": 0.5289, + "step": 4510 }, { "epoch": 40.01, - "learning_rate": 1.7806267806267805e-05, - "loss": 0.5644, - "step": 4770 + "learning_rate": 1.7851184517851186e-05, + "loss": 0.7913, + "step": 4520 }, { "epoch": 40.01, - "learning_rate": 1.772712883823995e-05, - "loss": 0.5152, - "step": 4780 + "learning_rate": 1.776776776776777e-05, + "loss": 0.6343, + "step": 4530 }, { "epoch": 40.01, - "learning_rate": 1.7647989870212093e-05, - "loss": 0.5133, - "step": 4790 + "learning_rate": 1.7684351017684353e-05, + "loss": 0.5266, + "step": 4540 }, { "epoch": 40.01, - "learning_rate": 1.7568850902184235e-05, - "loss": 0.6261, - "step": 4800 + "learning_rate": 1.7600934267600936e-05, + "loss": 0.5792, + "step": 4550 }, { "epoch": 40.01, - "learning_rate": 1.748971193415638e-05, - "loss": 0.8368, - "step": 4810 + "learning_rate": 1.7517517517517516e-05, + "loss": 0.8629, + "step": 4560 }, { "epoch": 40.01, - "learning_rate": 1.7410572966128523e-05, - "loss": 0.5701, - "step": 4820 + "learning_rate": 1.7434100767434103e-05, + "loss": 0.7761, + "step": 4570 }, { "epoch": 40.02, - "learning_rate": 1.7331433998100665e-05, - "loss": 0.4038, - "step": 4830 + "learning_rate": 1.7350684017350687e-05, + "loss": 0.7784, + "step": 4580 }, { "epoch": 40.02, - "eval_accuracy": 0.8064516129032258, - "eval_loss": 0.6451985836029053, - "eval_runtime": 261.7369, - "eval_samples_per_second": 0.829, - "eval_steps_per_second": 0.21, - "step": 4838 + "learning_rate": 1.7267267267267267e-05, + "loss": 0.5036, + "step": 4590 }, { - "epoch": 41.0, - "learning_rate": 1.725229503007281e-05, - "loss": 0.4527, - "step": 4840 + "epoch": 40.02, + "eval_accuracy": 0.631336405529954, + "eval_loss": 1.1159977912902832, + "eval_runtime": 266.794, + "eval_samples_per_second": 0.813, + "eval_steps_per_second": 0.206, + "step": 4592 }, { "epoch": 41.0, - "learning_rate": 1.717315606204495e-05, - "loss": 0.9572, - "step": 4850 + "learning_rate": 1.7183850517183853e-05, + "loss": 0.8591, + "step": 4600 }, { "epoch": 41.0, - "learning_rate": 1.7094017094017095e-05, - "loss": 0.5401, - "step": 4860 + "learning_rate": 1.7100433767100434e-05, + "loss": 0.4294, + "step": 4610 }, { "epoch": 41.0, - "learning_rate": 1.7014878125989238e-05, - "loss": 0.3303, - "step": 4870 + "learning_rate": 1.7017017017017017e-05, + "loss": 0.5119, + "step": 4620 }, { "epoch": 41.01, - "learning_rate": 1.693573915796138e-05, - "loss": 0.6231, - "step": 4880 + "learning_rate": 1.6933600266933604e-05, + "loss": 0.3545, + "step": 4630 }, { "epoch": 41.01, - "learning_rate": 1.6856600189933525e-05, - "loss": 0.4519, - "step": 4890 + "learning_rate": 1.6850183516850184e-05, + "loss": 0.7615, + "step": 4640 }, { "epoch": 41.01, - "learning_rate": 1.6777461221905664e-05, - "loss": 0.6937, - "step": 4900 + "learning_rate": 1.6766766766766767e-05, + "loss": 0.1783, + "step": 4650 }, { "epoch": 41.01, - "learning_rate": 1.669832225387781e-05, - "loss": 0.4248, - "step": 4910 + "learning_rate": 1.668335001668335e-05, + "loss": 0.6125, + "step": 4660 }, { "epoch": 41.01, - "learning_rate": 1.6619183285849956e-05, - "loss": 0.6655, - "step": 4920 + "learning_rate": 1.6599933266599934e-05, + "loss": 1.0295, + "step": 4670 }, { "epoch": 41.01, - "learning_rate": 1.6540044317822094e-05, - "loss": 0.5651, - "step": 4930 + "learning_rate": 1.6516516516516518e-05, + "loss": 0.6956, + "step": 4680 }, { "epoch": 41.01, - "learning_rate": 1.646090534979424e-05, - "loss": 0.6075, - "step": 4940 + "learning_rate": 1.6433099766433098e-05, + "loss": 0.6383, + "step": 4690 }, { "epoch": 41.02, - "learning_rate": 1.6381766381766382e-05, - "loss": 0.8347, - "step": 4950 + "learning_rate": 1.6349683016349685e-05, + "loss": 0.814, + "step": 4700 }, { "epoch": 41.02, - "eval_accuracy": 0.7926267281105991, - "eval_loss": 0.7040404677391052, - "eval_runtime": 257.2136, - "eval_samples_per_second": 0.844, - "eval_steps_per_second": 0.214, - "step": 4956 - }, - { - "epoch": 42.0, - "learning_rate": 1.6302627413738524e-05, - "loss": 0.7086, - "step": 4960 + "eval_accuracy": 0.695852534562212, + "eval_loss": 0.903153657913208, + "eval_runtime": 298.2312, + "eval_samples_per_second": 0.728, + "eval_steps_per_second": 0.184, + "step": 4704 }, { "epoch": 42.0, - "learning_rate": 1.622348844571067e-05, - "loss": 0.6848, - "step": 4970 + "learning_rate": 1.6266266266266268e-05, + "loss": 0.5097, + "step": 4710 }, { "epoch": 42.0, - "learning_rate": 1.6144349477682812e-05, - "loss": 0.718, - "step": 4980 + "learning_rate": 1.6182849516182848e-05, + "loss": 0.4471, + "step": 4720 }, { "epoch": 42.0, - "learning_rate": 1.6065210509654955e-05, - "loss": 0.3821, - "step": 4990 + "learning_rate": 1.6099432766099435e-05, + "loss": 0.4364, + "step": 4730 }, { "epoch": 42.01, - "learning_rate": 1.5986071541627097e-05, - "loss": 0.488, - "step": 5000 + "learning_rate": 1.6016016016016015e-05, + "loss": 0.5641, + "step": 4740 }, { "epoch": 42.01, - "learning_rate": 1.590693257359924e-05, - "loss": 0.3441, - "step": 5010 + "learning_rate": 1.59325992659326e-05, + "loss": 0.8213, + "step": 4750 }, { "epoch": 42.01, - "learning_rate": 1.5827793605571385e-05, - "loss": 0.5323, - "step": 5020 + "learning_rate": 1.5849182515849185e-05, + "loss": 0.7006, + "step": 4760 }, { "epoch": 42.01, - "learning_rate": 1.5748654637543527e-05, - "loss": 0.3723, - "step": 5030 + "learning_rate": 1.5765765765765765e-05, + "loss": 0.5187, + "step": 4770 }, { "epoch": 42.01, - "learning_rate": 1.566951566951567e-05, - "loss": 0.5536, - "step": 5040 + "learning_rate": 1.568234901568235e-05, + "loss": 0.3012, + "step": 4780 }, { "epoch": 42.01, - "learning_rate": 1.5590376701487815e-05, - "loss": 0.2562, - "step": 5050 + "learning_rate": 1.5598932265598936e-05, + "loss": 0.7188, + "step": 4790 }, { "epoch": 42.01, - "learning_rate": 1.5511237733459957e-05, - "loss": 0.4913, - "step": 5060 + "learning_rate": 1.5515515515515516e-05, + "loss": 0.5198, + "step": 4800 }, { "epoch": 42.02, "learning_rate": 1.54320987654321e-05, - "loss": 0.4113, - "step": 5070 + "loss": 0.7293, + "step": 4810 }, { "epoch": 42.02, - "eval_accuracy": 0.7373271889400922, - "eval_loss": 0.8011646866798401, - "eval_runtime": 252.0655, - "eval_samples_per_second": 0.861, - "eval_steps_per_second": 0.218, - "step": 5074 + "eval_accuracy": 0.728110599078341, + "eval_loss": 0.9331218004226685, + "eval_runtime": 258.6829, + "eval_samples_per_second": 0.839, + "eval_steps_per_second": 0.213, + "step": 4816 }, { "epoch": 43.0, - "learning_rate": 1.535295979740424e-05, - "loss": 0.1942, - "step": 5080 + "learning_rate": 1.5348682015348683e-05, + "loss": 0.4944, + "step": 4820 }, { "epoch": 43.0, - "learning_rate": 1.5273820829376387e-05, - "loss": 0.6847, - "step": 5090 + "learning_rate": 1.5265265265265266e-05, + "loss": 0.4447, + "step": 4830 }, { "epoch": 43.0, - "learning_rate": 1.5194681861348528e-05, - "loss": 0.5127, - "step": 5100 + "learning_rate": 1.5181848515181848e-05, + "loss": 0.3132, + "step": 4840 }, { "epoch": 43.01, - "learning_rate": 1.5115542893320671e-05, - "loss": 0.18, - "step": 5110 + "learning_rate": 1.5098431765098433e-05, + "loss": 0.5882, + "step": 4850 }, { "epoch": 43.01, - "learning_rate": 1.5036403925292817e-05, - "loss": 0.6956, - "step": 5120 + "learning_rate": 1.5015015015015016e-05, + "loss": 0.7151, + "step": 4860 }, { "epoch": 43.01, - "learning_rate": 1.4957264957264958e-05, - "loss": 0.7358, - "step": 5130 + "learning_rate": 1.4931598264931598e-05, + "loss": 0.7703, + "step": 4870 }, { "epoch": 43.01, - "learning_rate": 1.4878125989237102e-05, - "loss": 0.7034, - "step": 5140 + "learning_rate": 1.4848181514848183e-05, + "loss": 0.8505, + "step": 4880 }, { "epoch": 43.01, - "learning_rate": 1.4798987021209244e-05, - "loss": 0.5671, - "step": 5150 + "learning_rate": 1.4764764764764765e-05, + "loss": 0.3957, + "step": 4890 }, { "epoch": 43.01, - "learning_rate": 1.4719848053181388e-05, - "loss": 0.276, - "step": 5160 + "learning_rate": 1.4681348014681348e-05, + "loss": 0.5204, + "step": 4900 }, { "epoch": 43.01, - "learning_rate": 1.4640709085153532e-05, - "loss": 0.5131, - "step": 5170 - }, - { - "epoch": 43.02, - "learning_rate": 1.4561570117125672e-05, - "loss": 0.6712, - "step": 5180 + "learning_rate": 1.4597931264597934e-05, + "loss": 0.4383, + "step": 4910 }, { "epoch": 43.02, - "learning_rate": 1.4482431149097816e-05, - "loss": 0.3681, - "step": 5190 + "learning_rate": 1.4514514514514515e-05, + "loss": 0.4402, + "step": 4920 }, { "epoch": 43.02, - "eval_accuracy": 0.7880184331797235, - "eval_loss": 0.762200117111206, - "eval_runtime": 254.5668, - "eval_samples_per_second": 0.852, - "eval_steps_per_second": 0.216, - "step": 5192 + "eval_accuracy": 0.5668202764976958, + "eval_loss": 1.4190495014190674, + "eval_runtime": 275.5299, + "eval_samples_per_second": 0.788, + "eval_steps_per_second": 0.2, + "step": 4928 }, { "epoch": 44.0, - "learning_rate": 1.440329218106996e-05, - "loss": 0.4388, - "step": 5200 + "learning_rate": 1.4431097764431097e-05, + "loss": 0.2849, + "step": 4930 }, { "epoch": 44.0, - "learning_rate": 1.4324153213042102e-05, - "loss": 0.4731, - "step": 5210 + "learning_rate": 1.4347681014347682e-05, + "loss": 0.5245, + "step": 4940 }, { "epoch": 44.0, - "learning_rate": 1.4245014245014246e-05, - "loss": 0.4798, - "step": 5220 + "learning_rate": 1.4264264264264266e-05, + "loss": 0.4884, + "step": 4950 }, { - "epoch": 44.01, - "learning_rate": 1.416587527698639e-05, - "loss": 0.3783, - "step": 5230 + "epoch": 44.0, + "learning_rate": 1.4180847514180847e-05, + "loss": 0.3824, + "step": 4960 }, { "epoch": 44.01, - "learning_rate": 1.408673630895853e-05, - "loss": 0.2336, - "step": 5240 + "learning_rate": 1.4097430764097431e-05, + "loss": 0.5175, + "step": 4970 }, { "epoch": 44.01, - "learning_rate": 1.4007597340930676e-05, - "loss": 0.3797, - "step": 5250 + "learning_rate": 1.4014014014014016e-05, + "loss": 0.5526, + "step": 4980 }, { "epoch": 44.01, - "learning_rate": 1.3928458372902817e-05, - "loss": 0.4423, - "step": 5260 + "learning_rate": 1.3930597263930598e-05, + "loss": 0.4134, + "step": 4990 }, { "epoch": 44.01, - "learning_rate": 1.384931940487496e-05, - "loss": 0.5598, - "step": 5270 + "learning_rate": 1.384718051384718e-05, + "loss": 0.4121, + "step": 5000 }, { "epoch": 44.01, - "learning_rate": 1.3770180436847105e-05, - "loss": 0.5773, - "step": 5280 + "learning_rate": 1.3763763763763765e-05, + "loss": 0.7849, + "step": 5010 }, { "epoch": 44.01, - "learning_rate": 1.3691041468819247e-05, - "loss": 0.3113, - "step": 5290 + "learning_rate": 1.3680347013680348e-05, + "loss": 0.7587, + "step": 5020 }, { "epoch": 44.02, - "learning_rate": 1.361190250079139e-05, - "loss": 0.2841, - "step": 5300 + "learning_rate": 1.359693026359693e-05, + "loss": 0.4583, + "step": 5030 }, { "epoch": 44.02, - "learning_rate": 1.3532763532763535e-05, - "loss": 1.0092, - "step": 5310 + "learning_rate": 1.3513513513513515e-05, + "loss": 0.4625, + "step": 5040 }, { "epoch": 44.02, - "eval_accuracy": 0.7880184331797235, - "eval_loss": 0.7931644916534424, - "eval_runtime": 259.7621, - "eval_samples_per_second": 0.835, - "eval_steps_per_second": 0.212, - "step": 5310 + "eval_accuracy": 0.7004608294930875, + "eval_loss": 1.0268219709396362, + "eval_runtime": 299.0881, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.184, + "step": 5040 }, { "epoch": 45.0, - "learning_rate": 1.3453624564735675e-05, - "loss": 0.6327, - "step": 5320 + "learning_rate": 1.3430096763430097e-05, + "loss": 0.4425, + "step": 5050 }, { "epoch": 45.0, - "learning_rate": 1.3374485596707819e-05, - "loss": 1.0639, - "step": 5330 + "learning_rate": 1.334668001334668e-05, + "loss": 0.6364, + "step": 5060 }, { "epoch": 45.0, - "learning_rate": 1.3295346628679963e-05, - "loss": 0.2824, - "step": 5340 - }, - { - "epoch": 45.01, - "learning_rate": 1.3216207660652105e-05, - "loss": 0.3033, - "step": 5350 + "learning_rate": 1.3263263263263265e-05, + "loss": 0.386, + "step": 5070 }, { "epoch": 45.01, - "learning_rate": 1.313706869262425e-05, - "loss": 0.5596, - "step": 5360 + "learning_rate": 1.3179846513179847e-05, + "loss": 0.6462, + "step": 5080 }, { "epoch": 45.01, - "learning_rate": 1.305792972459639e-05, - "loss": 0.4285, - "step": 5370 + "learning_rate": 1.3096429763096429e-05, + "loss": 0.8925, + "step": 5090 }, { "epoch": 45.01, - "learning_rate": 1.2978790756568535e-05, - "loss": 0.7668, - "step": 5380 + "learning_rate": 1.3013013013013014e-05, + "loss": 0.6358, + "step": 5100 }, { "epoch": 45.01, - "learning_rate": 1.289965178854068e-05, - "loss": 0.6117, - "step": 5390 + "learning_rate": 1.2929596262929598e-05, + "loss": 0.8605, + "step": 5110 }, { "epoch": 45.01, - "learning_rate": 1.282051282051282e-05, - "loss": 0.5597, - "step": 5400 + "learning_rate": 1.284617951284618e-05, + "loss": 0.9282, + "step": 5120 }, { "epoch": 45.01, - "learning_rate": 1.2741373852484964e-05, - "loss": 0.2782, - "step": 5410 + "learning_rate": 1.2762762762762764e-05, + "loss": 0.7836, + "step": 5130 }, { "epoch": 45.02, - "learning_rate": 1.2662234884457108e-05, - "loss": 0.321, - "step": 5420 + "learning_rate": 1.2679346012679346e-05, + "loss": 0.4875, + "step": 5140 }, { "epoch": 45.02, - "eval_accuracy": 0.7373271889400922, - "eval_loss": 0.9068748354911804, - "eval_runtime": 259.5409, - "eval_samples_per_second": 0.836, - "eval_steps_per_second": 0.212, - "step": 5428 + "learning_rate": 1.259592926259593e-05, + "loss": 0.2266, + "step": 5150 }, { - "epoch": 46.0, - "learning_rate": 1.258309591642925e-05, - "loss": 0.5204, - "step": 5430 + "epoch": 45.02, + "eval_accuracy": 0.6405529953917051, + "eval_loss": 1.2808195352554321, + "eval_runtime": 257.9463, + "eval_samples_per_second": 0.841, + "eval_steps_per_second": 0.213, + "step": 5152 }, { "epoch": 46.0, - "learning_rate": 1.2503956948401394e-05, - "loss": 0.5057, - "step": 5440 + "learning_rate": 1.2512512512512515e-05, + "loss": 0.6837, + "step": 5160 }, { "epoch": 46.0, - "learning_rate": 1.2424817980373536e-05, - "loss": 0.5511, - "step": 5450 + "learning_rate": 1.2429095762429097e-05, + "loss": 0.3609, + "step": 5170 }, { "epoch": 46.0, "learning_rate": 1.2345679012345678e-05, - "loss": 0.6262, - "step": 5460 + "loss": 0.7921, + "step": 5180 }, { "epoch": 46.01, - "learning_rate": 1.2266540044317822e-05, - "loss": 0.3925, - "step": 5470 + "learning_rate": 1.2262262262262263e-05, + "loss": 1.2116, + "step": 5190 }, { "epoch": 46.01, - "learning_rate": 1.2187401076289966e-05, - "loss": 0.5744, - "step": 5480 + "learning_rate": 1.2178845512178847e-05, + "loss": 0.6612, + "step": 5200 }, { "epoch": 46.01, - "learning_rate": 1.2108262108262108e-05, - "loss": 0.4216, - "step": 5490 + "learning_rate": 1.2095428762095429e-05, + "loss": 0.2317, + "step": 5210 }, { "epoch": 46.01, - "learning_rate": 1.2029123140234252e-05, - "loss": 0.6228, - "step": 5500 + "learning_rate": 1.2012012012012012e-05, + "loss": 0.4213, + "step": 5220 }, { "epoch": 46.01, - "learning_rate": 1.1949984172206395e-05, - "loss": 0.3394, - "step": 5510 + "learning_rate": 1.1928595261928596e-05, + "loss": 0.8834, + "step": 5230 }, { "epoch": 46.01, - "learning_rate": 1.1870845204178538e-05, - "loss": 0.7777, - "step": 5520 + "learning_rate": 1.1845178511845179e-05, + "loss": 0.8265, + "step": 5240 }, { "epoch": 46.01, - "learning_rate": 1.179170623615068e-05, - "loss": 0.3025, - "step": 5530 + "learning_rate": 1.1761761761761762e-05, + "loss": 0.1315, + "step": 5250 }, { "epoch": 46.02, - "learning_rate": 1.1712567268122825e-05, - "loss": 0.399, - "step": 5540 + "learning_rate": 1.1678345011678346e-05, + "loss": 0.7424, + "step": 5260 }, { "epoch": 46.02, - "eval_accuracy": 0.8110599078341014, - "eval_loss": 0.6439275741577148, - "eval_runtime": 263.8811, - "eval_samples_per_second": 0.822, - "eval_steps_per_second": 0.208, - "step": 5546 - }, - { - "epoch": 47.0, - "learning_rate": 1.1633428300094967e-05, - "loss": 0.6139, - "step": 5550 + "eval_accuracy": 0.6497695852534562, + "eval_loss": 1.1821485757827759, + "eval_runtime": 277.4384, + "eval_samples_per_second": 0.782, + "eval_steps_per_second": 0.198, + "step": 5264 }, { "epoch": 47.0, - "learning_rate": 1.155428933206711e-05, - "loss": 0.4756, - "step": 5560 + "learning_rate": 1.1594928261594928e-05, + "loss": 0.7674, + "step": 5270 }, { "epoch": 47.0, - "learning_rate": 1.1475150364039255e-05, - "loss": 0.4553, - "step": 5570 + "learning_rate": 1.1511511511511513e-05, + "loss": 0.8093, + "step": 5280 }, { "epoch": 47.0, - "learning_rate": 1.1396011396011397e-05, - "loss": 0.4806, - "step": 5580 + "learning_rate": 1.1428094761428096e-05, + "loss": 0.3749, + "step": 5290 }, { "epoch": 47.01, - "learning_rate": 1.1316872427983539e-05, - "loss": 0.6994, - "step": 5590 + "learning_rate": 1.1344678011344678e-05, + "loss": 0.572, + "step": 5300 }, { "epoch": 47.01, - "learning_rate": 1.1237733459955681e-05, - "loss": 0.6582, - "step": 5600 + "learning_rate": 1.1261261261261261e-05, + "loss": 0.4389, + "step": 5310 }, { "epoch": 47.01, - "learning_rate": 1.1158594491927827e-05, - "loss": 0.241, - "step": 5610 + "learning_rate": 1.1177844511177845e-05, + "loss": 0.421, + "step": 5320 }, { "epoch": 47.01, - "learning_rate": 1.107945552389997e-05, - "loss": 0.6311, - "step": 5620 + "learning_rate": 1.1094427761094428e-05, + "loss": 0.7683, + "step": 5330 }, { "epoch": 47.01, - "learning_rate": 1.1000316555872111e-05, - "loss": 0.8412, - "step": 5630 + "learning_rate": 1.1011011011011012e-05, + "loss": 0.333, + "step": 5340 }, { "epoch": 47.01, - "learning_rate": 1.0921177587844255e-05, - "loss": 0.2699, - "step": 5640 + "learning_rate": 1.0927594260927594e-05, + "loss": 0.8005, + "step": 5350 }, { "epoch": 47.01, - "learning_rate": 1.0842038619816398e-05, - "loss": 0.3901, - "step": 5650 + "learning_rate": 1.0844177510844179e-05, + "loss": 0.5426, + "step": 5360 }, { "epoch": 47.02, - "learning_rate": 1.0762899651788542e-05, - "loss": 0.3699, - "step": 5660 + "learning_rate": 1.0760760760760762e-05, + "loss": 0.4852, + "step": 5370 }, { "epoch": 47.02, - "eval_accuracy": 0.7695852534562212, - "eval_loss": 0.7740164399147034, - "eval_runtime": 266.667, - "eval_samples_per_second": 0.814, - "eval_steps_per_second": 0.206, - "step": 5664 + "eval_accuracy": 0.6589861751152074, + "eval_loss": 1.2433668375015259, + "eval_runtime": 303.2594, + "eval_samples_per_second": 0.716, + "eval_steps_per_second": 0.181, + "step": 5376 }, { "epoch": 48.0, - "learning_rate": 1.0683760683760684e-05, - "loss": 0.8451, - "step": 5670 + "learning_rate": 1.0677344010677344e-05, + "loss": 0.9861, + "step": 5380 }, { "epoch": 48.0, - "learning_rate": 1.0604621715732828e-05, - "loss": 0.5939, - "step": 5680 + "learning_rate": 1.0593927260593927e-05, + "loss": 0.4832, + "step": 5390 }, { "epoch": 48.0, - "learning_rate": 1.052548274770497e-05, - "loss": 0.7695, - "step": 5690 + "learning_rate": 1.051051051051051e-05, + "loss": 0.3998, + "step": 5400 }, { "epoch": 48.01, - "learning_rate": 1.0446343779677114e-05, - "loss": 0.5514, - "step": 5700 + "learning_rate": 1.0427093760427094e-05, + "loss": 0.599, + "step": 5410 }, { "epoch": 48.01, - "learning_rate": 1.0367204811649256e-05, - "loss": 0.4196, - "step": 5710 + "learning_rate": 1.0343677010343678e-05, + "loss": 0.6899, + "step": 5420 }, { "epoch": 48.01, - "learning_rate": 1.02880658436214e-05, - "loss": 0.2501, - "step": 5720 + "learning_rate": 1.0260260260260261e-05, + "loss": 0.618, + "step": 5430 }, { "epoch": 48.01, - "learning_rate": 1.0208926875593542e-05, - "loss": 0.6282, - "step": 5730 + "learning_rate": 1.0176843510176843e-05, + "loss": 0.5096, + "step": 5440 }, { "epoch": 48.01, - "learning_rate": 1.0129787907565686e-05, - "loss": 0.5634, - "step": 5740 + "learning_rate": 1.0093426760093428e-05, + "loss": 0.379, + "step": 5450 }, { "epoch": 48.01, - "learning_rate": 1.005064893953783e-05, - "loss": 0.3623, - "step": 5750 + "learning_rate": 1.0010010010010011e-05, + "loss": 0.5432, + "step": 5460 }, { "epoch": 48.01, - "learning_rate": 9.971509971509972e-06, - "loss": 0.4048, - "step": 5760 - }, - { - "epoch": 48.02, - "learning_rate": 9.892371003482115e-06, - "loss": 0.5855, - "step": 5770 + "learning_rate": 9.926593259926593e-06, + "loss": 0.5489, + "step": 5470 }, { "epoch": 48.02, - "learning_rate": 9.813232035454257e-06, - "loss": 0.4297, - "step": 5780 + "learning_rate": 9.843176509843177e-06, + "loss": 0.523, + "step": 5480 }, { "epoch": 48.02, - "eval_accuracy": 0.8248847926267281, - "eval_loss": 0.6810868978500366, - "eval_runtime": 261.851, - "eval_samples_per_second": 0.829, - "eval_steps_per_second": 0.21, - "step": 5782 + "eval_accuracy": 0.6267281105990783, + "eval_loss": 1.2123322486877441, + "eval_runtime": 258.3979, + "eval_samples_per_second": 0.84, + "eval_steps_per_second": 0.213, + "step": 5488 }, { "epoch": 49.0, - "learning_rate": 9.734093067426402e-06, - "loss": 0.3992, - "step": 5790 + "learning_rate": 9.75975975975976e-06, + "loss": 0.2441, + "step": 5490 }, { "epoch": 49.0, - "learning_rate": 9.654954099398545e-06, - "loss": 1.0214, - "step": 5800 + "learning_rate": 9.676343009676344e-06, + "loss": 0.6196, + "step": 5500 }, { "epoch": 49.0, - "learning_rate": 9.575815131370687e-06, - "loss": 0.4853, - "step": 5810 + "learning_rate": 9.592926259592927e-06, + "loss": 0.3021, + "step": 5510 }, { - "epoch": 49.01, - "learning_rate": 9.49667616334283e-06, - "loss": 0.2907, - "step": 5820 + "epoch": 49.0, + "learning_rate": 9.509509509509509e-06, + "loss": 0.73, + "step": 5520 }, { "epoch": 49.01, - "learning_rate": 9.417537195314973e-06, - "loss": 0.4338, - "step": 5830 + "learning_rate": 9.426092759426094e-06, + "loss": 0.5437, + "step": 5530 }, { "epoch": 49.01, - "learning_rate": 9.338398227287117e-06, - "loss": 0.3648, - "step": 5840 + "learning_rate": 9.342676009342677e-06, + "loss": 0.3555, + "step": 5540 }, { "epoch": 49.01, "learning_rate": 9.259259259259259e-06, - "loss": 0.8034, - "step": 5850 + "loss": 0.5324, + "step": 5550 }, { "epoch": 49.01, - "learning_rate": 9.180120291231403e-06, - "loss": 0.7394, - "step": 5860 + "learning_rate": 9.175842509175843e-06, + "loss": 0.3838, + "step": 5560 }, { "epoch": 49.01, - "learning_rate": 9.100981323203545e-06, - "loss": 0.494, - "step": 5870 + "learning_rate": 9.092425759092426e-06, + "loss": 0.5131, + "step": 5570 }, { "epoch": 49.01, - "learning_rate": 9.02184235517569e-06, - "loss": 0.9404, - "step": 5880 + "learning_rate": 9.00900900900901e-06, + "loss": 0.5087, + "step": 5580 }, { "epoch": 49.02, - "learning_rate": 8.942703387147831e-06, - "loss": 0.4178, - "step": 5890 + "learning_rate": 8.925592258925593e-06, + "loss": 0.5122, + "step": 5590 }, { "epoch": 49.02, - "learning_rate": 8.863564419119975e-06, - "loss": 0.2783, - "step": 5900 + "learning_rate": 8.842175508842176e-06, + "loss": 0.8344, + "step": 5600 }, { "epoch": 49.02, - "eval_accuracy": 0.8525345622119815, - "eval_loss": 0.586846113204956, - "eval_runtime": 260.0845, - "eval_samples_per_second": 0.834, - "eval_steps_per_second": 0.211, - "step": 5900 + "eval_accuracy": 0.663594470046083, + "eval_loss": 1.1888865232467651, + "eval_runtime": 295.3851, + "eval_samples_per_second": 0.735, + "eval_steps_per_second": 0.186, + "step": 5600 }, { "epoch": 50.0, - "learning_rate": 8.784425451092118e-06, - "loss": 0.4036, - "step": 5910 + "learning_rate": 8.758758758758758e-06, + "loss": 0.6569, + "step": 5610 }, { "epoch": 50.0, - "learning_rate": 8.705286483064262e-06, - "loss": 0.6644, - "step": 5920 + "learning_rate": 8.675342008675343e-06, + "loss": 0.4491, + "step": 5620 }, { "epoch": 50.0, - "learning_rate": 8.626147515036405e-06, - "loss": 0.2605, - "step": 5930 - }, - { - "epoch": 50.01, - "learning_rate": 8.547008547008548e-06, - "loss": 0.6677, - "step": 5940 + "learning_rate": 8.591925258591927e-06, + "loss": 0.6111, + "step": 5630 }, { "epoch": 50.01, - "learning_rate": 8.46786957898069e-06, - "loss": 0.4203, - "step": 5950 + "learning_rate": 8.508508508508508e-06, + "loss": 0.2911, + "step": 5640 }, { "epoch": 50.01, - "learning_rate": 8.388730610952832e-06, - "loss": 0.286, - "step": 5960 + "learning_rate": 8.425091758425092e-06, + "loss": 0.6463, + "step": 5650 }, { "epoch": 50.01, - "learning_rate": 8.309591642924978e-06, - "loss": 0.5586, - "step": 5970 + "learning_rate": 8.341675008341675e-06, + "loss": 0.3108, + "step": 5660 }, { "epoch": 50.01, - "learning_rate": 8.23045267489712e-06, - "loss": 0.5642, - "step": 5980 + "learning_rate": 8.258258258258259e-06, + "loss": 0.4594, + "step": 5670 }, { "epoch": 50.01, - "learning_rate": 8.151313706869262e-06, - "loss": 0.583, - "step": 5990 + "learning_rate": 8.174841508174842e-06, + "loss": 0.4389, + "step": 5680 }, { "epoch": 50.01, - "learning_rate": 8.072174738841406e-06, - "loss": 0.2579, - "step": 6000 + "learning_rate": 8.091424758091424e-06, + "loss": 0.4616, + "step": 5690 }, { "epoch": 50.02, - "learning_rate": 7.993035770813548e-06, - "loss": 0.4946, - "step": 6010 + "learning_rate": 8.008008008008007e-06, + "loss": 0.2669, + "step": 5700 }, { "epoch": 50.02, - "eval_accuracy": 0.7926267281105991, - "eval_loss": 0.673189103603363, - "eval_runtime": 263.3409, - "eval_samples_per_second": 0.824, - "eval_steps_per_second": 0.209, - "step": 6018 + "learning_rate": 7.924591257924593e-06, + "loss": 0.6648, + "step": 5710 }, { - "epoch": 51.0, - "learning_rate": 7.913896802785692e-06, - "loss": 0.2915, - "step": 6020 + "epoch": 50.02, + "eval_accuracy": 0.6405529953917051, + "eval_loss": 1.2328165769577026, + "eval_runtime": 294.1867, + "eval_samples_per_second": 0.738, + "eval_steps_per_second": 0.187, + "step": 5712 }, { "epoch": 51.0, - "learning_rate": 7.834757834757835e-06, - "loss": 0.4099, - "step": 6030 + "learning_rate": 7.841174507841174e-06, + "loss": 0.5881, + "step": 5720 }, { "epoch": 51.0, - "learning_rate": 7.755618866729978e-06, - "loss": 0.3578, - "step": 6040 + "learning_rate": 7.757757757757758e-06, + "loss": 0.658, + "step": 5730 }, { "epoch": 51.0, - "learning_rate": 7.67647989870212e-06, - "loss": 0.6813, - "step": 6050 + "learning_rate": 7.674341007674341e-06, + "loss": 0.473, + "step": 5740 }, { "epoch": 51.01, - "learning_rate": 7.597340930674264e-06, - "loss": 0.7868, - "step": 6060 + "learning_rate": 7.590924257590924e-06, + "loss": 0.561, + "step": 5750 }, { "epoch": 51.01, - "learning_rate": 7.5182019626464085e-06, - "loss": 0.3421, - "step": 6070 + "learning_rate": 7.507507507507508e-06, + "loss": 0.3124, + "step": 5760 }, { "epoch": 51.01, - "learning_rate": 7.439062994618551e-06, - "loss": 0.3972, - "step": 6080 + "learning_rate": 7.424090757424092e-06, + "loss": 0.6171, + "step": 5770 }, { "epoch": 51.01, - "learning_rate": 7.359924026590694e-06, - "loss": 0.4223, - "step": 6090 + "learning_rate": 7.340674007340674e-06, + "loss": 0.2376, + "step": 5780 }, { "epoch": 51.01, - "learning_rate": 7.280785058562836e-06, - "loss": 0.4222, - "step": 6100 + "learning_rate": 7.257257257257258e-06, + "loss": 0.5859, + "step": 5790 }, { "epoch": 51.01, - "learning_rate": 7.20164609053498e-06, - "loss": 0.5646, - "step": 6110 + "learning_rate": 7.173840507173841e-06, + "loss": 0.3655, + "step": 5800 }, { "epoch": 51.01, - "learning_rate": 7.122507122507123e-06, - "loss": 0.5948, - "step": 6120 + "learning_rate": 7.090423757090424e-06, + "loss": 0.5225, + "step": 5810 }, { "epoch": 51.02, - "learning_rate": 7.043368154479265e-06, - "loss": 0.3058, - "step": 6130 + "learning_rate": 7.007007007007008e-06, + "loss": 0.6929, + "step": 5820 }, { "epoch": 51.02, - "eval_accuracy": 0.8341013824884793, - "eval_loss": 0.551148533821106, - "eval_runtime": 267.39, - "eval_samples_per_second": 0.812, - "eval_steps_per_second": 0.206, - "step": 6136 - }, - { - "epoch": 52.0, - "learning_rate": 6.964229186451408e-06, - "loss": 0.4489, - "step": 6140 + "eval_accuracy": 0.6129032258064516, + "eval_loss": 1.3269143104553223, + "eval_runtime": 258.5914, + "eval_samples_per_second": 0.839, + "eval_steps_per_second": 0.213, + "step": 5824 }, { "epoch": 52.0, - "learning_rate": 6.885090218423552e-06, - "loss": 0.6678, - "step": 6150 + "learning_rate": 6.92359025692359e-06, + "loss": 0.2531, + "step": 5830 }, { "epoch": 52.0, - "learning_rate": 6.805951250395695e-06, - "loss": 0.4425, - "step": 6160 + "learning_rate": 6.840173506840174e-06, + "loss": 0.6214, + "step": 5840 }, { "epoch": 52.0, - "learning_rate": 6.726812282367838e-06, - "loss": 0.4, - "step": 6170 + "learning_rate": 6.7567567567567575e-06, + "loss": 0.6634, + "step": 5850 }, { "epoch": 52.01, - "learning_rate": 6.6476733143399815e-06, - "loss": 0.6218, - "step": 6180 + "learning_rate": 6.67334000667334e-06, + "loss": 0.5057, + "step": 5860 }, { "epoch": 52.01, - "learning_rate": 6.568534346312125e-06, - "loss": 0.4908, - "step": 6190 + "learning_rate": 6.589923256589924e-06, + "loss": 0.4402, + "step": 5870 }, { "epoch": 52.01, - "learning_rate": 6.489395378284268e-06, - "loss": 0.4602, - "step": 6200 + "learning_rate": 6.506506506506507e-06, + "loss": 0.9344, + "step": 5880 }, { "epoch": 52.01, - "learning_rate": 6.41025641025641e-06, - "loss": 0.3036, - "step": 6210 + "learning_rate": 6.42308975642309e-06, + "loss": 0.1245, + "step": 5890 }, { "epoch": 52.01, - "learning_rate": 6.331117442228554e-06, - "loss": 0.229, - "step": 6220 + "learning_rate": 6.339673006339673e-06, + "loss": 0.568, + "step": 5900 }, { "epoch": 52.01, - "learning_rate": 6.251978474200697e-06, - "loss": 0.5506, - "step": 6230 + "learning_rate": 6.256256256256257e-06, + "loss": 0.4524, + "step": 5910 }, { "epoch": 52.01, "learning_rate": 6.172839506172839e-06, - "loss": 0.201, - "step": 6240 + "loss": 0.4983, + "step": 5920 }, { "epoch": 52.02, - "learning_rate": 6.093700538144983e-06, - "loss": 0.1286, - "step": 6250 + "learning_rate": 6.0894227560894234e-06, + "loss": 0.4253, + "step": 5930 }, { "epoch": 52.02, - "eval_accuracy": 0.8294930875576036, - "eval_loss": 0.5877251029014587, - "eval_runtime": 267.204, - "eval_samples_per_second": 0.812, - "eval_steps_per_second": 0.206, - "step": 6254 + "eval_accuracy": 0.6820276497695853, + "eval_loss": 1.1884889602661133, + "eval_runtime": 278.2143, + "eval_samples_per_second": 0.78, + "eval_steps_per_second": 0.198, + "step": 5936 }, { "epoch": 53.0, - "learning_rate": 6.014561570117126e-06, - "loss": 0.5248, - "step": 6260 + "learning_rate": 6.006006006006006e-06, + "loss": 0.5096, + "step": 5940 }, { "epoch": 53.0, - "learning_rate": 5.935422602089269e-06, - "loss": 0.5, - "step": 6270 + "learning_rate": 5.9225892559225895e-06, + "loss": 0.6105, + "step": 5950 }, { "epoch": 53.0, - "learning_rate": 5.856283634061412e-06, - "loss": 0.484, - "step": 6280 + "learning_rate": 5.839172505839173e-06, + "loss": 1.0454, + "step": 5960 }, { "epoch": 53.01, - "learning_rate": 5.777144666033555e-06, - "loss": 0.4989, - "step": 6290 + "learning_rate": 5.755755755755756e-06, + "loss": 0.4144, + "step": 5970 }, { "epoch": 53.01, - "learning_rate": 5.6980056980056985e-06, - "loss": 0.71, - "step": 6300 + "learning_rate": 5.672339005672339e-06, + "loss": 0.8673, + "step": 5980 }, { "epoch": 53.01, - "learning_rate": 5.618866729977841e-06, - "loss": 0.8, - "step": 6310 + "learning_rate": 5.5889222555889224e-06, + "loss": 0.4563, + "step": 5990 }, { "epoch": 53.01, - "learning_rate": 5.539727761949985e-06, - "loss": 0.5235, - "step": 6320 + "learning_rate": 5.505505505505506e-06, + "loss": 0.5437, + "step": 6000 }, { "epoch": 53.01, - "learning_rate": 5.460588793922128e-06, - "loss": 0.9647, - "step": 6330 + "learning_rate": 5.422088755422089e-06, + "loss": 0.4049, + "step": 6010 }, { "epoch": 53.01, - "learning_rate": 5.381449825894271e-06, - "loss": 0.3382, - "step": 6340 + "learning_rate": 5.338672005338672e-06, + "loss": 0.5225, + "step": 6020 }, { "epoch": 53.01, - "learning_rate": 5.302310857866414e-06, - "loss": 0.4008, - "step": 6350 - }, - { - "epoch": 53.02, - "learning_rate": 5.223171889838557e-06, - "loss": 0.5169, - "step": 6360 + "learning_rate": 5.255255255255255e-06, + "loss": 0.4383, + "step": 6030 }, { "epoch": 53.02, - "learning_rate": 5.1440329218107e-06, - "loss": 0.2013, - "step": 6370 + "learning_rate": 5.171838505171839e-06, + "loss": 0.7003, + "step": 6040 }, { "epoch": 53.02, - "eval_accuracy": 0.815668202764977, - "eval_loss": 0.6507552266120911, - "eval_runtime": 274.5228, - "eval_samples_per_second": 0.79, - "eval_steps_per_second": 0.2, - "step": 6372 + "eval_accuracy": 0.7004608294930875, + "eval_loss": 1.1521508693695068, + "eval_runtime": 293.3646, + "eval_samples_per_second": 0.74, + "eval_steps_per_second": 0.187, + "step": 6048 }, { "epoch": 54.0, - "learning_rate": 5.064893953782843e-06, - "loss": 1.0097, - "step": 6380 + "learning_rate": 5.0884217550884214e-06, + "loss": 0.6495, + "step": 6050 }, { "epoch": 54.0, - "learning_rate": 4.985754985754986e-06, - "loss": 0.2331, - "step": 6390 + "learning_rate": 5.005005005005006e-06, + "loss": 0.5755, + "step": 6060 }, { "epoch": 54.0, - "learning_rate": 4.906616017727128e-06, - "loss": 0.388, - "step": 6400 + "learning_rate": 4.921588254921588e-06, + "loss": 0.5131, + "step": 6070 }, { - "epoch": 54.01, - "learning_rate": 4.827477049699272e-06, - "loss": 0.4358, - "step": 6410 + "epoch": 54.0, + "learning_rate": 4.838171504838172e-06, + "loss": 0.5332, + "step": 6080 }, { "epoch": 54.01, - "learning_rate": 4.748338081671415e-06, - "loss": 0.4098, - "step": 6420 + "learning_rate": 4.754754754754754e-06, + "loss": 0.2762, + "step": 6090 }, { "epoch": 54.01, - "learning_rate": 4.6691991136435585e-06, - "loss": 0.3491, - "step": 6430 + "learning_rate": 4.671338004671339e-06, + "loss": 0.383, + "step": 6100 }, { "epoch": 54.01, - "learning_rate": 4.5900601456157015e-06, - "loss": 0.2839, - "step": 6440 + "learning_rate": 4.587921254587921e-06, + "loss": 0.6731, + "step": 6110 }, { "epoch": 54.01, - "learning_rate": 4.510921177587845e-06, - "loss": 0.1628, - "step": 6450 + "learning_rate": 4.504504504504505e-06, + "loss": 0.4703, + "step": 6120 }, { "epoch": 54.01, - "learning_rate": 4.431782209559988e-06, - "loss": 1.0689, - "step": 6460 + "learning_rate": 4.421087754421088e-06, + "loss": 0.3767, + "step": 6130 }, { "epoch": 54.01, - "learning_rate": 4.352643241532131e-06, - "loss": 0.6193, - "step": 6470 + "learning_rate": 4.337671004337672e-06, + "loss": 0.5669, + "step": 6140 }, { "epoch": 54.02, - "learning_rate": 4.273504273504274e-06, - "loss": 0.7058, - "step": 6480 + "learning_rate": 4.254254254254254e-06, + "loss": 0.4277, + "step": 6150 }, { "epoch": 54.02, - "learning_rate": 4.194365305476416e-06, - "loss": 0.2027, - "step": 6490 + "learning_rate": 4.170837504170838e-06, + "loss": 0.4105, + "step": 6160 }, { "epoch": 54.02, - "eval_accuracy": 0.815668202764977, - "eval_loss": 0.6629670858383179, - "eval_runtime": 274.2142, - "eval_samples_per_second": 0.791, - "eval_steps_per_second": 0.201, - "step": 6490 + "eval_accuracy": 0.7373271889400922, + "eval_loss": 1.0037211179733276, + "eval_runtime": 262.5907, + "eval_samples_per_second": 0.826, + "eval_steps_per_second": 0.209, + "step": 6160 }, { "epoch": 55.0, - "learning_rate": 4.11522633744856e-06, - "loss": 0.4227, - "step": 6500 + "learning_rate": 4.087420754087421e-06, + "loss": 0.2836, + "step": 6170 }, { "epoch": 55.0, - "learning_rate": 4.036087369420703e-06, - "loss": 0.8382, - "step": 6510 + "learning_rate": 4.004004004004004e-06, + "loss": 0.6498, + "step": 6180 }, { "epoch": 55.0, - "learning_rate": 3.956948401392846e-06, - "loss": 0.5188, - "step": 6520 - }, - { - "epoch": 55.01, - "learning_rate": 3.877809433364989e-06, - "loss": 0.4906, - "step": 6530 + "learning_rate": 3.920587253920587e-06, + "loss": 0.5334, + "step": 6190 }, { "epoch": 55.01, - "learning_rate": 3.798670465337132e-06, - "loss": 0.4729, - "step": 6540 + "learning_rate": 3.837170503837171e-06, + "loss": 0.7971, + "step": 6200 }, { "epoch": 55.01, - "learning_rate": 3.7195314973092754e-06, - "loss": 0.6123, - "step": 6550 + "learning_rate": 3.753753753753754e-06, + "loss": 0.3278, + "step": 6210 }, { "epoch": 55.01, - "learning_rate": 3.640392529281418e-06, - "loss": 0.5144, - "step": 6560 + "learning_rate": 3.670337003670337e-06, + "loss": 0.3237, + "step": 6220 }, { "epoch": 55.01, - "learning_rate": 3.5612535612535615e-06, - "loss": 0.863, - "step": 6570 + "learning_rate": 3.5869202535869206e-06, + "loss": 0.7723, + "step": 6230 }, { "epoch": 55.01, - "learning_rate": 3.482114593225704e-06, - "loss": 0.4071, - "step": 6580 + "learning_rate": 3.503503503503504e-06, + "loss": 0.6996, + "step": 6240 }, { "epoch": 55.01, - "learning_rate": 3.4029756251978477e-06, - "loss": 0.5755, - "step": 6590 + "learning_rate": 3.420086753420087e-06, + "loss": 0.544, + "step": 6250 }, { "epoch": 55.02, - "learning_rate": 3.3238366571699908e-06, - "loss": 0.6267, - "step": 6600 + "learning_rate": 3.33667000333667e-06, + "loss": 0.6693, + "step": 6260 }, { "epoch": 55.02, - "eval_accuracy": 0.8064516129032258, - "eval_loss": 0.7372620701789856, - "eval_runtime": 267.3033, - "eval_samples_per_second": 0.812, - "eval_steps_per_second": 0.206, - "step": 6608 + "learning_rate": 3.2532532532532535e-06, + "loss": 0.5206, + "step": 6270 }, { - "epoch": 56.0, - "learning_rate": 3.244697689142134e-06, - "loss": 0.3038, - "step": 6610 + "epoch": 55.02, + "eval_accuracy": 0.7188940092165899, + "eval_loss": 1.0913441181182861, + "eval_runtime": 299.8505, + "eval_samples_per_second": 0.724, + "eval_steps_per_second": 0.183, + "step": 6272 }, { "epoch": 56.0, - "learning_rate": 3.165558721114277e-06, - "loss": 0.6753, - "step": 6620 + "learning_rate": 3.1698365031698365e-06, + "loss": 0.2842, + "step": 6280 }, { "epoch": 56.0, "learning_rate": 3.0864197530864196e-06, - "loss": 0.4761, - "step": 6630 + "loss": 0.4048, + "step": 6290 }, { "epoch": 56.0, - "learning_rate": 3.007280785058563e-06, - "loss": 0.4697, - "step": 6640 + "learning_rate": 3.003003003003003e-06, + "loss": 0.6278, + "step": 6300 }, { "epoch": 56.01, - "learning_rate": 2.928141817030706e-06, - "loss": 0.2215, - "step": 6650 + "learning_rate": 2.9195862529195865e-06, + "loss": 0.4073, + "step": 6310 }, { "epoch": 56.01, - "learning_rate": 2.8490028490028492e-06, - "loss": 0.4558, - "step": 6660 + "learning_rate": 2.8361695028361695e-06, + "loss": 0.7256, + "step": 6320 }, { "epoch": 56.01, - "learning_rate": 2.7698638809749923e-06, - "loss": 0.7401, - "step": 6670 + "learning_rate": 2.752752752752753e-06, + "loss": 0.5157, + "step": 6330 }, { "epoch": 56.01, - "learning_rate": 2.6907249129471354e-06, - "loss": 0.5682, - "step": 6680 + "learning_rate": 2.669336002669336e-06, + "loss": 0.205, + "step": 6340 }, { "epoch": 56.01, - "learning_rate": 2.6115859449192785e-06, - "loss": 0.5346, - "step": 6690 + "learning_rate": 2.5859192525859194e-06, + "loss": 0.513, + "step": 6350 }, { "epoch": 56.01, - "learning_rate": 2.5324469768914215e-06, - "loss": 0.7019, - "step": 6700 + "learning_rate": 2.502502502502503e-06, + "loss": 0.3176, + "step": 6360 }, { "epoch": 56.01, - "learning_rate": 2.453308008863564e-06, - "loss": 0.4813, - "step": 6710 + "learning_rate": 2.419085752419086e-06, + "loss": 0.4901, + "step": 6370 }, { "epoch": 56.02, - "learning_rate": 2.3741690408357077e-06, - "loss": 0.4561, - "step": 6720 + "learning_rate": 2.3356690023356693e-06, + "loss": 0.7129, + "step": 6380 }, { "epoch": 56.02, - "eval_accuracy": 0.8018433179723502, - "eval_loss": 0.7382919192314148, - "eval_runtime": 275.3448, - "eval_samples_per_second": 0.788, - "eval_steps_per_second": 0.2, - "step": 6726 - }, - { - "epoch": 57.0, - "learning_rate": 2.2950300728078508e-06, - "loss": 0.4686, - "step": 6730 + "eval_accuracy": 0.6866359447004609, + "eval_loss": 1.108277678489685, + "eval_runtime": 292.9571, + "eval_samples_per_second": 0.741, + "eval_steps_per_second": 0.188, + "step": 6384 }, { "epoch": 57.0, - "learning_rate": 2.215891104779994e-06, - "loss": 0.3041, - "step": 6740 + "learning_rate": 2.2522522522522524e-06, + "loss": 0.2624, + "step": 6390 }, { "epoch": 57.0, - "learning_rate": 2.136752136752137e-06, - "loss": 0.241, - "step": 6750 + "learning_rate": 2.168835502168836e-06, + "loss": 0.0914, + "step": 6400 }, { "epoch": 57.0, - "learning_rate": 2.05761316872428e-06, - "loss": 0.4126, - "step": 6760 + "learning_rate": 2.085418752085419e-06, + "loss": 0.684, + "step": 6410 }, { "epoch": 57.01, - "learning_rate": 1.978474200696423e-06, - "loss": 0.286, - "step": 6770 + "learning_rate": 2.002002002002002e-06, + "loss": 0.3441, + "step": 6420 }, { "epoch": 57.01, - "learning_rate": 1.899335232668566e-06, - "loss": 0.582, - "step": 6780 + "learning_rate": 1.9185852519185853e-06, + "loss": 0.4939, + "step": 6430 }, { "epoch": 57.01, - "learning_rate": 1.820196264640709e-06, - "loss": 0.3814, - "step": 6790 + "learning_rate": 1.8351685018351686e-06, + "loss": 0.1774, + "step": 6440 }, { "epoch": 57.01, - "learning_rate": 1.741057296612852e-06, - "loss": 0.6788, - "step": 6800 + "learning_rate": 1.751751751751752e-06, + "loss": 0.5289, + "step": 6450 }, { "epoch": 57.01, - "learning_rate": 1.6619183285849954e-06, - "loss": 0.44, - "step": 6810 + "learning_rate": 1.668335001668335e-06, + "loss": 0.9437, + "step": 6460 }, { "epoch": 57.01, - "learning_rate": 1.5827793605571385e-06, - "loss": 0.4996, - "step": 6820 + "learning_rate": 1.5849182515849183e-06, + "loss": 0.7129, + "step": 6470 }, { "epoch": 57.01, - "learning_rate": 1.5036403925292815e-06, - "loss": 0.2839, - "step": 6830 + "learning_rate": 1.5015015015015015e-06, + "loss": 0.1014, + "step": 6480 }, { "epoch": 57.02, - "learning_rate": 1.4245014245014246e-06, - "loss": 0.7002, - "step": 6840 + "learning_rate": 1.4180847514180847e-06, + "loss": 0.4772, + "step": 6490 }, { "epoch": 57.02, - "eval_accuracy": 0.8110599078341014, - "eval_loss": 0.7072679400444031, - "eval_runtime": 266.757, - "eval_samples_per_second": 0.813, - "eval_steps_per_second": 0.206, - "step": 6844 + "eval_accuracy": 0.7142857142857143, + "eval_loss": 1.1276459693908691, + "eval_runtime": 263.52, + "eval_samples_per_second": 0.823, + "eval_steps_per_second": 0.209, + "step": 6496 }, { "epoch": 58.0, - "learning_rate": 1.3453624564735677e-06, - "loss": 0.215, - "step": 6850 + "learning_rate": 1.334668001334668e-06, + "loss": 0.5678, + "step": 6500 }, { "epoch": 58.0, - "learning_rate": 1.2662234884457108e-06, - "loss": 0.6569, - "step": 6860 + "learning_rate": 1.2512512512512514e-06, + "loss": 0.2869, + "step": 6510 }, { "epoch": 58.0, - "learning_rate": 1.1870845204178538e-06, - "loss": 0.6116, - "step": 6870 + "learning_rate": 1.1678345011678347e-06, + "loss": 0.9173, + "step": 6520 }, { "epoch": 58.01, - "learning_rate": 1.107945552389997e-06, - "loss": 0.3416, - "step": 6880 + "learning_rate": 1.084417751084418e-06, + "loss": 0.5002, + "step": 6530 }, { "epoch": 58.01, - "learning_rate": 1.02880658436214e-06, - "loss": 0.571, - "step": 6890 + "learning_rate": 1.001001001001001e-06, + "loss": 0.4222, + "step": 6540 }, { "epoch": 58.01, - "learning_rate": 9.49667616334283e-07, - "loss": 0.6238, - "step": 6900 + "learning_rate": 9.175842509175843e-07, + "loss": 0.2793, + "step": 6550 }, { "epoch": 58.01, - "learning_rate": 8.70528648306426e-07, - "loss": 0.346, - "step": 6910 + "learning_rate": 8.341675008341675e-07, + "loss": 0.2781, + "step": 6560 }, { "epoch": 58.01, - "learning_rate": 7.913896802785692e-07, - "loss": 0.4527, - "step": 6920 + "learning_rate": 7.507507507507508e-07, + "loss": 0.4779, + "step": 6570 }, { "epoch": 58.01, - "learning_rate": 7.122507122507123e-07, - "loss": 0.5342, - "step": 6930 + "learning_rate": 6.67334000667334e-07, + "loss": 0.4709, + "step": 6580 }, { "epoch": 58.01, - "learning_rate": 6.331117442228554e-07, - "loss": 0.523, - "step": 6940 - }, - { - "epoch": 58.02, - "learning_rate": 5.539727761949985e-07, - "loss": 0.2861, - "step": 6950 + "learning_rate": 5.839172505839173e-07, + "loss": 0.5879, + "step": 6590 }, { "epoch": 58.02, - "learning_rate": 4.748338081671415e-07, - "loss": 0.1823, - "step": 6960 + "learning_rate": 5.005005005005005e-07, + "loss": 0.4822, + "step": 6600 }, { "epoch": 58.02, - "eval_accuracy": 0.8202764976958525, - "eval_loss": 0.6870871186256409, - "eval_runtime": 266.4451, - "eval_samples_per_second": 0.814, - "eval_steps_per_second": 0.206, - "step": 6962 + "eval_accuracy": 0.7235023041474654, + "eval_loss": 1.0919580459594727, + "eval_runtime": 304.8665, + "eval_samples_per_second": 0.712, + "eval_steps_per_second": 0.18, + "step": 6608 }, { "epoch": 59.0, - "learning_rate": 3.956948401392846e-07, - "loss": 0.5552, - "step": 6970 + "learning_rate": 4.1708375041708376e-07, + "loss": 0.601, + "step": 6610 }, { "epoch": 59.0, - "learning_rate": 3.165558721114277e-07, - "loss": 0.4444, - "step": 6980 + "learning_rate": 3.33667000333667e-07, + "loss": 0.5131, + "step": 6620 }, { "epoch": 59.0, - "learning_rate": 2.3741690408357074e-07, - "loss": 0.4752, - "step": 6990 + "learning_rate": 2.5025025025025023e-07, + "loss": 0.175, + "step": 6630 }, { - "epoch": 59.01, - "learning_rate": 1.5827793605571385e-07, - "loss": 0.176, - "step": 7000 + "epoch": 59.0, + "learning_rate": 1.668335001668335e-07, + "loss": 0.4028, + "step": 6640 }, { "epoch": 59.01, - "learning_rate": 7.913896802785692e-08, - "loss": 0.8408, - "step": 7010 + "learning_rate": 8.341675008341675e-08, + "loss": 0.5526, + "step": 6650 }, { "epoch": 59.01, "learning_rate": 0.0, - "loss": 0.2439, - "step": 7020 - }, - { - "epoch": 59.01, - "eval_accuracy": 0.8202764976958525, - "eval_loss": 0.690118670463562, - "eval_runtime": 307.1421, - "eval_samples_per_second": 0.707, - "eval_steps_per_second": 0.179, - "step": 7020 - }, - { - "epoch": 59.01, - "step": 7020, - "total_flos": 3.476987046291161e+19, - "train_loss": 0.6254725841715125, - "train_runtime": 65940.1985, - "train_samples_per_second": 0.426, - "train_steps_per_second": 0.106 + "loss": 0.6307, + "step": 6660 }, { "epoch": 59.01, - "eval_accuracy": 0.6990740740740741, - "eval_loss": 1.074745774269104, - "eval_runtime": 265.3059, - "eval_samples_per_second": 0.814, - "eval_steps_per_second": 0.204, - "step": 7020 + "eval_accuracy": 0.7188940092165899, + "eval_loss": 1.098652958869934, + "eval_runtime": 289.552, + "eval_samples_per_second": 0.749, + "eval_steps_per_second": 0.19, + "step": 6660 }, { "epoch": 59.01, - "eval_accuracy": 0.6990740740740741, - "eval_loss": 1.074745774269104, - "eval_runtime": 283.9645, - "eval_samples_per_second": 0.761, - "eval_steps_per_second": 0.19, - "step": 7020 + "step": 6660, + "total_flos": 3.3122529003141366e+19, + "train_loss": 0.7877646436920395, + "train_runtime": 57566.1475, + "train_samples_per_second": 0.463, + "train_steps_per_second": 0.116 }, { "epoch": 59.01, - "eval_accuracy": 0.6990740740740741, - "eval_loss": 1.0747456550598145, - "eval_runtime": 273.0941, - "eval_samples_per_second": 0.791, - "eval_steps_per_second": 0.198, - "step": 7020 + "eval_accuracy": 0.6296296296296297, + "eval_loss": 0.8929917812347412, + "eval_runtime": 260.8949, + "eval_samples_per_second": 0.828, + "eval_steps_per_second": 0.207, + "step": 6660 }, { "epoch": 59.01, - "eval_accuracy": 0.6990740740740741, - "eval_loss": 1.074745774269104, - "eval_runtime": 320.3996, - "eval_samples_per_second": 0.674, - "eval_steps_per_second": 0.169, - "step": 7020 + "eval_accuracy": 0.6296296296296297, + "eval_loss": 0.8929917812347412, + "eval_runtime": 274.034, + "eval_samples_per_second": 0.788, + "eval_steps_per_second": 0.197, + "step": 6660 } ], "logging_steps": 10, - "max_steps": 7020, + "max_steps": 6660, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, - "total_flos": 3.476987046291161e+19, + "total_flos": 3.3122529003141366e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null