diff --git "a/check-22000/trainer_state.json" "b/check-22000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/check-22000/trainer_state.json" @@ -0,0 +1,13612 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9986835534976621, + "global_step": 22000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.999092105860457e-05, + "loss": 2.4805, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.9981842117209137e-05, + "loss": 2.4355, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.99727631758137e-05, + "loss": 1.317, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.996368423441827e-05, + "loss": 1.5764, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 1.9954605293022836e-05, + "loss": 2.4654, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.9945526351627404e-05, + "loss": 3.0094, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 1.993644741023197e-05, + "loss": 1.9016, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 1.9927368468836535e-05, + "loss": 2.7273, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 1.9918289527441103e-05, + "loss": 2.0789, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 1.9909210586045667e-05, + "loss": 1.8132, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 1.9900131644650234e-05, + "loss": 2.7362, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 1.9891052703254802e-05, + "loss": 1.1584, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 1.9881973761859366e-05, + "loss": 1.8474, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 1.9872894820463934e-05, + "loss": 1.2248, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 1.98638158790685e-05, + "loss": 1.8344, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 1.985473693767307e-05, + "loss": 1.5881, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 1.9845657996277636e-05, + "loss": 1.9684, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 1.98365790548822e-05, + "loss": 1.9283, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 1.9827500113486768e-05, + "loss": 1.776, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 1.9818421172091336e-05, + "loss": 1.5498, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 1.9809342230695903e-05, + "loss": 2.0404, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 1.980026328930047e-05, + "loss": 2.0609, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 1.9791184347905038e-05, + "loss": 1.2291, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 1.9782105406509602e-05, + "loss": 0.6943, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 1.977302646511417e-05, + "loss": 1.5242, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 1.9763947523718737e-05, + "loss": 1.5855, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 1.9754868582323305e-05, + "loss": 1.6128, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 1.974578964092787e-05, + "loss": 1.4307, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 1.9736710699532437e-05, + "loss": 1.402, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 1.9727631758137e-05, + "loss": 1.4633, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 1.971855281674157e-05, + "loss": 1.0023, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 1.9709473875346136e-05, + "loss": 1.2508, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 1.9700394933950703e-05, + "loss": 1.4191, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 1.9691315992555268e-05, + "loss": 1.0203, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 1.9682237051159835e-05, + "loss": 1.2482, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 1.9673158109764403e-05, + "loss": 1.108, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 1.966407916836897e-05, + "loss": 0.8581, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 1.9655000226973538e-05, + "loss": 1.307, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 1.9645921285578102e-05, + "loss": 1.0133, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 1.963684234418267e-05, + "loss": 0.9578, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 1.9627763402787237e-05, + "loss": 1.2262, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 1.9618684461391805e-05, + "loss": 1.075, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 1.9609605519996372e-05, + "loss": 1.1379, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 1.9600526578600936e-05, + "loss": 1.1068, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 1.9591447637205504e-05, + "loss": 0.9898, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 1.958236869581007e-05, + "loss": 0.7063, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 1.9573289754414636e-05, + "loss": 1.0111, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 1.9564210813019203e-05, + "loss": 0.9605, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 1.955513187162377e-05, + "loss": 0.8791, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 1.9546052930228335e-05, + "loss": 0.8611, + "step": 500 + }, + { + "epoch": 0.02, + "eval_accuracy": 0.5282472235634958, + "eval_loss": 0.9476923942565918, + "eval_runtime": 73.7994, + "eval_samples_per_second": 56.125, + "eval_steps_per_second": 14.038, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9536973988832902e-05, + "loss": 0.7687, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 1.952789504743747e-05, + "loss": 1.0031, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 1.9518816106042037e-05, + "loss": 1.0047, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 1.95097371646466e-05, + "loss": 1.198, + "step": 540 + }, + { + "epoch": 0.02, + "learning_rate": 1.950065822325117e-05, + "loss": 1.1266, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 1.9491579281855737e-05, + "loss": 0.9148, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 1.9482500340460304e-05, + "loss": 0.9363, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 1.9473421399064872e-05, + "loss": 1.1197, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 1.946434245766944e-05, + "loss": 0.8326, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 1.9455263516274003e-05, + "loss": 1.0715, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 1.944618457487857e-05, + "loss": 0.8307, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 1.943710563348314e-05, + "loss": 0.6955, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 1.9428026692087706e-05, + "loss": 0.9908, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 1.9418947750692274e-05, + "loss": 0.7029, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 1.9409868809296838e-05, + "loss": 0.8758, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 1.9400789867901405e-05, + "loss": 0.858, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 1.939171092650597e-05, + "loss": 0.6072, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 1.9382631985110537e-05, + "loss": 0.7605, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 1.9373553043715105e-05, + "loss": 0.6867, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 1.936447410231967e-05, + "loss": 0.8187, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 1.9355395160924236e-05, + "loss": 0.7312, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 1.9346316219528804e-05, + "loss": 0.7791, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 1.933723727813337e-05, + "loss": 0.8156, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 1.932815833673794e-05, + "loss": 0.9059, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 1.9319079395342503e-05, + "loss": 0.7408, + "step": 750 + }, + { + "epoch": 0.03, + "learning_rate": 1.931000045394707e-05, + "loss": 0.8867, + "step": 760 + }, + { + "epoch": 0.03, + "learning_rate": 1.9300921512551638e-05, + "loss": 0.6566, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 1.9291842571156206e-05, + "loss": 0.7961, + "step": 780 + }, + { + "epoch": 0.04, + "learning_rate": 1.9282763629760773e-05, + "loss": 0.7789, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 1.9273684688365337e-05, + "loss": 0.9086, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 1.9264605746969905e-05, + "loss": 0.7547, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 1.9255526805574472e-05, + "loss": 0.7371, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 1.924644786417904e-05, + "loss": 0.6797, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 1.9237368922783604e-05, + "loss": 0.6729, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 1.922828998138817e-05, + "loss": 0.6578, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 1.921921103999274e-05, + "loss": 0.7986, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 1.9210132098597303e-05, + "loss": 0.691, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 1.920105315720187e-05, + "loss": 0.7988, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 1.919197421580644e-05, + "loss": 0.7607, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 1.9182895274411003e-05, + "loss": 0.6561, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 1.917381633301557e-05, + "loss": 0.71, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 1.9164737391620138e-05, + "loss": 0.7926, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 1.9155658450224705e-05, + "loss": 0.8121, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 1.9146579508829273e-05, + "loss": 0.6301, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 1.913750056743384e-05, + "loss": 0.6631, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 1.9128421626038404e-05, + "loss": 0.774, + "step": 960 + }, + { + "epoch": 0.04, + "learning_rate": 1.9119342684642972e-05, + "loss": 0.6559, + "step": 970 + }, + { + "epoch": 0.04, + "learning_rate": 1.911026374324754e-05, + "loss": 0.7158, + "step": 980 + }, + { + "epoch": 0.04, + "learning_rate": 1.9101184801852107e-05, + "loss": 0.8125, + "step": 990 + }, + { + "epoch": 0.05, + "learning_rate": 1.9092105860456675e-05, + "loss": 0.8473, + "step": 1000 + }, + { + "epoch": 0.05, + "eval_accuracy": 0.5444229840656688, + "eval_loss": 0.7574031949043274, + "eval_runtime": 73.6682, + "eval_samples_per_second": 56.225, + "eval_steps_per_second": 14.063, + "step": 1000 + }, + { + "epoch": 0.05, + "learning_rate": 1.908302691906124e-05, + "loss": 0.6639, + "step": 1010 + }, + { + "epoch": 0.05, + "learning_rate": 1.9073947977665806e-05, + "loss": 0.7871, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 1.9064869036270374e-05, + "loss": 0.651, + "step": 1030 + }, + { + "epoch": 0.05, + "learning_rate": 1.9055790094874938e-05, + "loss": 0.7703, + "step": 1040 + }, + { + "epoch": 0.05, + "learning_rate": 1.9046711153479506e-05, + "loss": 0.7203, + "step": 1050 + }, + { + "epoch": 0.05, + "learning_rate": 1.9037632212084073e-05, + "loss": 0.7766, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 1.9028553270688637e-05, + "loss": 0.6488, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 1.9019474329293205e-05, + "loss": 0.9973, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 1.9010395387897772e-05, + "loss": 0.7613, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 1.900131644650234e-05, + "loss": 0.6891, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 1.8992237505106904e-05, + "loss": 0.6699, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 1.898315856371147e-05, + "loss": 0.6811, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 1.897407962231604e-05, + "loss": 0.8492, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 1.8965000680920607e-05, + "loss": 0.7957, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 1.8955921739525174e-05, + "loss": 0.759, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 1.894684279812974e-05, + "loss": 0.8492, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 1.8937763856734306e-05, + "loss": 0.7164, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 1.8928684915338873e-05, + "loss": 0.6832, + "step": 1180 + }, + { + "epoch": 0.05, + "learning_rate": 1.891960597394344e-05, + "loss": 0.793, + "step": 1190 + }, + { + "epoch": 0.05, + "learning_rate": 1.891052703254801e-05, + "loss": 0.725, + "step": 1200 + }, + { + "epoch": 0.05, + "learning_rate": 1.8901448091152573e-05, + "loss": 0.6547, + "step": 1210 + }, + { + "epoch": 0.06, + "learning_rate": 1.889236914975714e-05, + "loss": 0.6254, + "step": 1220 + }, + { + "epoch": 0.06, + "learning_rate": 1.8883290208361708e-05, + "loss": 0.7918, + "step": 1230 + }, + { + "epoch": 0.06, + "learning_rate": 1.8874211266966272e-05, + "loss": 0.8402, + "step": 1240 + }, + { + "epoch": 0.06, + "learning_rate": 1.886513232557084e-05, + "loss": 0.6934, + "step": 1250 + }, + { + "epoch": 0.06, + "learning_rate": 1.8856053384175407e-05, + "loss": 0.691, + "step": 1260 + }, + { + "epoch": 0.06, + "learning_rate": 1.884697444277997e-05, + "loss": 0.6693, + "step": 1270 + }, + { + "epoch": 0.06, + "learning_rate": 1.883789550138454e-05, + "loss": 0.827, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 1.8828816559989106e-05, + "loss": 0.6457, + "step": 1290 + }, + { + "epoch": 0.06, + "learning_rate": 1.8819737618593674e-05, + "loss": 0.6223, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 1.8810658677198238e-05, + "loss": 0.7271, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 1.8801579735802806e-05, + "loss": 0.85, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 1.8792500794407373e-05, + "loss": 0.7082, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 1.878342185301194e-05, + "loss": 0.7145, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 1.8774342911616508e-05, + "loss": 0.8406, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 1.8765263970221076e-05, + "loss": 0.6855, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 1.875618502882564e-05, + "loss": 0.6941, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 1.8747106087430207e-05, + "loss": 0.6742, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 1.8738027146034775e-05, + "loss": 0.6928, + "step": 1390 + }, + { + "epoch": 0.06, + "learning_rate": 1.8728948204639342e-05, + "loss": 0.7762, + "step": 1400 + }, + { + "epoch": 0.06, + "learning_rate": 1.8719869263243907e-05, + "loss": 0.8305, + "step": 1410 + }, + { + "epoch": 0.06, + "learning_rate": 1.8710790321848474e-05, + "loss": 0.6861, + "step": 1420 + }, + { + "epoch": 0.06, + "learning_rate": 1.8701711380453042e-05, + "loss": 0.7648, + "step": 1430 + }, + { + "epoch": 0.07, + "learning_rate": 1.8692632439057606e-05, + "loss": 0.7822, + "step": 1440 + }, + { + "epoch": 0.07, + "learning_rate": 1.8683553497662173e-05, + "loss": 0.7717, + "step": 1450 + }, + { + "epoch": 0.07, + "learning_rate": 1.867447455626674e-05, + "loss": 0.6504, + "step": 1460 + }, + { + "epoch": 0.07, + "learning_rate": 1.8665395614871305e-05, + "loss": 0.7363, + "step": 1470 + }, + { + "epoch": 0.07, + "learning_rate": 1.8656316673475873e-05, + "loss": 0.6836, + "step": 1480 + }, + { + "epoch": 0.07, + "learning_rate": 1.864723773208044e-05, + "loss": 0.8811, + "step": 1490 + }, + { + "epoch": 0.07, + "learning_rate": 1.8638158790685008e-05, + "loss": 0.7086, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.5596330275229358, + "eval_loss": 0.7176767587661743, + "eval_runtime": 74.531, + "eval_samples_per_second": 55.574, + "eval_steps_per_second": 13.9, + "step": 1500 + }, + { + "epoch": 0.07, + "learning_rate": 1.8629079849289575e-05, + "loss": 0.6834, + "step": 1510 + }, + { + "epoch": 0.07, + "learning_rate": 1.862000090789414e-05, + "loss": 0.7777, + "step": 1520 + }, + { + "epoch": 0.07, + "learning_rate": 1.8610921966498707e-05, + "loss": 0.6586, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 1.8601843025103275e-05, + "loss": 0.7246, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 1.8592764083707842e-05, + "loss": 0.7203, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 1.858368514231241e-05, + "loss": 0.7113, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 1.8574606200916974e-05, + "loss": 0.7828, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 1.856552725952154e-05, + "loss": 0.7518, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 1.855644831812611e-05, + "loss": 0.693, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 1.8547369376730676e-05, + "loss": 0.7, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 1.853829043533524e-05, + "loss": 0.65, + "step": 1610 + }, + { + "epoch": 0.07, + "learning_rate": 1.8529211493939808e-05, + "loss": 0.6734, + "step": 1620 + }, + { + "epoch": 0.07, + "learning_rate": 1.8520132552544376e-05, + "loss": 0.7648, + "step": 1630 + }, + { + "epoch": 0.07, + "learning_rate": 1.851105361114894e-05, + "loss": 0.6742, + "step": 1640 + }, + { + "epoch": 0.07, + "learning_rate": 1.8501974669753507e-05, + "loss": 0.7734, + "step": 1650 + }, + { + "epoch": 0.08, + "learning_rate": 1.8492895728358075e-05, + "loss": 0.7621, + "step": 1660 + }, + { + "epoch": 0.08, + "learning_rate": 1.848381678696264e-05, + "loss": 0.6535, + "step": 1670 + }, + { + "epoch": 0.08, + "learning_rate": 1.8474737845567207e-05, + "loss": 0.7879, + "step": 1680 + }, + { + "epoch": 0.08, + "learning_rate": 1.8465658904171774e-05, + "loss": 0.6699, + "step": 1690 + }, + { + "epoch": 0.08, + "learning_rate": 1.845657996277634e-05, + "loss": 0.6672, + "step": 1700 + }, + { + "epoch": 0.08, + "learning_rate": 1.844750102138091e-05, + "loss": 0.759, + "step": 1710 + }, + { + "epoch": 0.08, + "learning_rate": 1.8438422079985477e-05, + "loss": 0.7328, + "step": 1720 + }, + { + "epoch": 0.08, + "learning_rate": 1.842934313859004e-05, + "loss": 0.7902, + "step": 1730 + }, + { + "epoch": 0.08, + "learning_rate": 1.842026419719461e-05, + "loss": 0.7342, + "step": 1740 + }, + { + "epoch": 0.08, + "learning_rate": 1.8411185255799176e-05, + "loss": 0.7047, + "step": 1750 + }, + { + "epoch": 0.08, + "learning_rate": 1.8402106314403744e-05, + "loss": 0.7137, + "step": 1760 + }, + { + "epoch": 0.08, + "learning_rate": 1.839302737300831e-05, + "loss": 0.6871, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 1.8383948431612875e-05, + "loss": 0.6461, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 1.8374869490217443e-05, + "loss": 0.7117, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 1.836579054882201e-05, + "loss": 0.7719, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 1.8356711607426574e-05, + "loss": 0.65, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 1.8347632666031142e-05, + "loss": 0.7473, + "step": 1820 + }, + { + "epoch": 0.08, + "learning_rate": 1.833855372463571e-05, + "loss": 0.7457, + "step": 1830 + }, + { + "epoch": 0.08, + "learning_rate": 1.8329474783240274e-05, + "loss": 0.7027, + "step": 1840 + }, + { + "epoch": 0.08, + "learning_rate": 1.832039584184484e-05, + "loss": 0.7066, + "step": 1850 + }, + { + "epoch": 0.08, + "learning_rate": 1.831131690044941e-05, + "loss": 0.7414, + "step": 1860 + }, + { + "epoch": 0.08, + "learning_rate": 1.8302237959053976e-05, + "loss": 0.7328, + "step": 1870 + }, + { + "epoch": 0.09, + "learning_rate": 1.829315901765854e-05, + "loss": 0.7215, + "step": 1880 + }, + { + "epoch": 0.09, + "learning_rate": 1.8284080076263108e-05, + "loss": 0.623, + "step": 1890 + }, + { + "epoch": 0.09, + "learning_rate": 1.8275001134867676e-05, + "loss": 0.7512, + "step": 1900 + }, + { + "epoch": 0.09, + "learning_rate": 1.8265922193472243e-05, + "loss": 0.7398, + "step": 1910 + }, + { + "epoch": 0.09, + "learning_rate": 1.825684325207681e-05, + "loss": 0.7441, + "step": 1920 + }, + { + "epoch": 0.09, + "learning_rate": 1.8247764310681375e-05, + "loss": 0.6504, + "step": 1930 + }, + { + "epoch": 0.09, + "learning_rate": 1.8238685369285942e-05, + "loss": 0.7625, + "step": 1940 + }, + { + "epoch": 0.09, + "learning_rate": 1.822960642789051e-05, + "loss": 0.668, + "step": 1950 + }, + { + "epoch": 0.09, + "learning_rate": 1.8220527486495077e-05, + "loss": 0.6961, + "step": 1960 + }, + { + "epoch": 0.09, + "learning_rate": 1.8211448545099645e-05, + "loss": 0.7477, + "step": 1970 + }, + { + "epoch": 0.09, + "learning_rate": 1.820236960370421e-05, + "loss": 0.7844, + "step": 1980 + }, + { + "epoch": 0.09, + "learning_rate": 1.8193290662308777e-05, + "loss": 0.6893, + "step": 1990 + }, + { + "epoch": 0.09, + "learning_rate": 1.8184211720913344e-05, + "loss": 0.7223, + "step": 2000 + }, + { + "epoch": 0.09, + "eval_accuracy": 0.5642201834862385, + "eval_loss": 0.7053582072257996, + "eval_runtime": 73.9274, + "eval_samples_per_second": 56.028, + "eval_steps_per_second": 14.014, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 1.817513277951791e-05, + "loss": 0.6996, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 1.8166053838122476e-05, + "loss": 0.6939, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 1.8156974896727043e-05, + "loss": 0.7418, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 1.8147895955331608e-05, + "loss": 0.7295, + "step": 2040 + }, + { + "epoch": 0.09, + "learning_rate": 1.8138817013936175e-05, + "loss": 0.6215, + "step": 2050 + }, + { + "epoch": 0.09, + "learning_rate": 1.8129738072540743e-05, + "loss": 0.6334, + "step": 2060 + }, + { + "epoch": 0.09, + "learning_rate": 1.812065913114531e-05, + "loss": 0.7109, + "step": 2070 + }, + { + "epoch": 0.09, + "learning_rate": 1.8111580189749878e-05, + "loss": 0.6764, + "step": 2080 + }, + { + "epoch": 0.09, + "learning_rate": 1.8102501248354442e-05, + "loss": 0.6371, + "step": 2090 + }, + { + "epoch": 0.1, + "learning_rate": 1.809342230695901e-05, + "loss": 0.6674, + "step": 2100 + }, + { + "epoch": 0.1, + "learning_rate": 1.8084343365563577e-05, + "loss": 0.6689, + "step": 2110 + }, + { + "epoch": 0.1, + "learning_rate": 1.8075264424168145e-05, + "loss": 0.6523, + "step": 2120 + }, + { + "epoch": 0.1, + "learning_rate": 1.8066185482772712e-05, + "loss": 0.6008, + "step": 2130 + }, + { + "epoch": 0.1, + "learning_rate": 1.8057106541377276e-05, + "loss": 0.742, + "step": 2140 + }, + { + "epoch": 0.1, + "learning_rate": 1.8048027599981844e-05, + "loss": 0.7527, + "step": 2150 + }, + { + "epoch": 0.1, + "learning_rate": 1.803894865858641e-05, + "loss": 0.7352, + "step": 2160 + }, + { + "epoch": 0.1, + "learning_rate": 1.802986971719098e-05, + "loss": 0.6502, + "step": 2170 + }, + { + "epoch": 0.1, + "learning_rate": 1.8020790775795543e-05, + "loss": 0.7156, + "step": 2180 + }, + { + "epoch": 0.1, + "learning_rate": 1.801171183440011e-05, + "loss": 0.7008, + "step": 2190 + }, + { + "epoch": 0.1, + "learning_rate": 1.8002632893004678e-05, + "loss": 0.6953, + "step": 2200 + }, + { + "epoch": 0.1, + "learning_rate": 1.7993553951609242e-05, + "loss": 0.7641, + "step": 2210 + }, + { + "epoch": 0.1, + "learning_rate": 1.798447501021381e-05, + "loss": 0.7266, + "step": 2220 + }, + { + "epoch": 0.1, + "learning_rate": 1.7975396068818377e-05, + "loss": 0.657, + "step": 2230 + }, + { + "epoch": 0.1, + "learning_rate": 1.796631712742294e-05, + "loss": 0.6883, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 1.795723818602751e-05, + "loss": 0.699, + "step": 2250 + }, + { + "epoch": 0.1, + "learning_rate": 1.7948159244632077e-05, + "loss": 0.6229, + "step": 2260 + }, + { + "epoch": 0.1, + "learning_rate": 1.7939080303236644e-05, + "loss": 0.6969, + "step": 2270 + }, + { + "epoch": 0.1, + "learning_rate": 1.7930001361841212e-05, + "loss": 0.7721, + "step": 2280 + }, + { + "epoch": 0.1, + "learning_rate": 1.7920922420445776e-05, + "loss": 0.7125, + "step": 2290 + }, + { + "epoch": 0.1, + "learning_rate": 1.7911843479050343e-05, + "loss": 0.7691, + "step": 2300 + }, + { + "epoch": 0.1, + "learning_rate": 1.790276453765491e-05, + "loss": 0.7262, + "step": 2310 + }, + { + "epoch": 0.11, + "learning_rate": 1.789368559625948e-05, + "loss": 0.7363, + "step": 2320 + }, + { + "epoch": 0.11, + "learning_rate": 1.7884606654864046e-05, + "loss": 0.6568, + "step": 2330 + }, + { + "epoch": 0.11, + "learning_rate": 1.787552771346861e-05, + "loss": 0.6848, + "step": 2340 + }, + { + "epoch": 0.11, + "learning_rate": 1.7866448772073178e-05, + "loss": 0.623, + "step": 2350 + }, + { + "epoch": 0.11, + "learning_rate": 1.7857369830677745e-05, + "loss": 0.7602, + "step": 2360 + }, + { + "epoch": 0.11, + "learning_rate": 1.7848290889282313e-05, + "loss": 0.5975, + "step": 2370 + }, + { + "epoch": 0.11, + "learning_rate": 1.7839211947886877e-05, + "loss": 0.6773, + "step": 2380 + }, + { + "epoch": 0.11, + "learning_rate": 1.7830133006491445e-05, + "loss": 0.7605, + "step": 2390 + }, + { + "epoch": 0.11, + "learning_rate": 1.7821054065096012e-05, + "loss": 0.7703, + "step": 2400 + }, + { + "epoch": 0.11, + "learning_rate": 1.7811975123700576e-05, + "loss": 0.7121, + "step": 2410 + }, + { + "epoch": 0.11, + "learning_rate": 1.7802896182305144e-05, + "loss": 0.7508, + "step": 2420 + }, + { + "epoch": 0.11, + "learning_rate": 1.779381724090971e-05, + "loss": 0.6875, + "step": 2430 + }, + { + "epoch": 0.11, + "learning_rate": 1.7784738299514275e-05, + "loss": 0.7035, + "step": 2440 + }, + { + "epoch": 0.11, + "learning_rate": 1.7775659358118843e-05, + "loss": 0.7422, + "step": 2450 + }, + { + "epoch": 0.11, + "learning_rate": 1.776658041672341e-05, + "loss": 0.6859, + "step": 2460 + }, + { + "epoch": 0.11, + "learning_rate": 1.7757501475327978e-05, + "loss": 0.7377, + "step": 2470 + }, + { + "epoch": 0.11, + "learning_rate": 1.7748422533932546e-05, + "loss": 0.7625, + "step": 2480 + }, + { + "epoch": 0.11, + "learning_rate": 1.7739343592537113e-05, + "loss": 0.6867, + "step": 2490 + }, + { + "epoch": 0.11, + "learning_rate": 1.7730264651141677e-05, + "loss": 0.7092, + "step": 2500 + }, + { + "epoch": 0.11, + "eval_accuracy": 0.5864316755190729, + "eval_loss": 0.69435054063797, + "eval_runtime": 74.0387, + "eval_samples_per_second": 55.944, + "eval_steps_per_second": 13.993, + "step": 2500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7721185709746245e-05, + "loss": 0.7135, + "step": 2510 + }, + { + "epoch": 0.11, + "learning_rate": 1.7712106768350812e-05, + "loss": 0.6902, + "step": 2520 + }, + { + "epoch": 0.11, + "learning_rate": 1.770302782695538e-05, + "loss": 0.6463, + "step": 2530 + }, + { + "epoch": 0.12, + "learning_rate": 1.7693948885559948e-05, + "loss": 0.7086, + "step": 2540 + }, + { + "epoch": 0.12, + "learning_rate": 1.768486994416451e-05, + "loss": 0.743, + "step": 2550 + }, + { + "epoch": 0.12, + "learning_rate": 1.767579100276908e-05, + "loss": 0.727, + "step": 2560 + }, + { + "epoch": 0.12, + "learning_rate": 1.7666712061373647e-05, + "loss": 0.7523, + "step": 2570 + }, + { + "epoch": 0.12, + "learning_rate": 1.765763311997821e-05, + "loss": 0.6906, + "step": 2580 + }, + { + "epoch": 0.12, + "learning_rate": 1.764855417858278e-05, + "loss": 0.7262, + "step": 2590 + }, + { + "epoch": 0.12, + "learning_rate": 1.7639475237187346e-05, + "loss": 0.6633, + "step": 2600 + }, + { + "epoch": 0.12, + "learning_rate": 1.763039629579191e-05, + "loss": 0.6121, + "step": 2610 + }, + { + "epoch": 0.12, + "learning_rate": 1.7621317354396478e-05, + "loss": 0.6309, + "step": 2620 + }, + { + "epoch": 0.12, + "learning_rate": 1.7612238413001045e-05, + "loss": 0.6721, + "step": 2630 + }, + { + "epoch": 0.12, + "learning_rate": 1.7603159471605613e-05, + "loss": 0.6562, + "step": 2640 + }, + { + "epoch": 0.12, + "learning_rate": 1.7594080530210177e-05, + "loss": 0.6633, + "step": 2650 + }, + { + "epoch": 0.12, + "learning_rate": 1.7585001588814745e-05, + "loss": 0.7188, + "step": 2660 + }, + { + "epoch": 0.12, + "learning_rate": 1.7575922647419312e-05, + "loss": 0.7297, + "step": 2670 + }, + { + "epoch": 0.12, + "learning_rate": 1.756684370602388e-05, + "loss": 0.6438, + "step": 2680 + }, + { + "epoch": 0.12, + "learning_rate": 1.7557764764628447e-05, + "loss": 0.6961, + "step": 2690 + }, + { + "epoch": 0.12, + "learning_rate": 1.754868582323301e-05, + "loss": 0.6992, + "step": 2700 + }, + { + "epoch": 0.12, + "learning_rate": 1.753960688183758e-05, + "loss": 0.5748, + "step": 2710 + }, + { + "epoch": 0.12, + "learning_rate": 1.7530527940442146e-05, + "loss": 0.5975, + "step": 2720 + }, + { + "epoch": 0.12, + "learning_rate": 1.7521448999046714e-05, + "loss": 0.7008, + "step": 2730 + }, + { + "epoch": 0.12, + "learning_rate": 1.751237005765128e-05, + "loss": 0.8043, + "step": 2740 + }, + { + "epoch": 0.12, + "learning_rate": 1.7503291116255846e-05, + "loss": 0.7314, + "step": 2750 + }, + { + "epoch": 0.13, + "learning_rate": 1.7494212174860413e-05, + "loss": 0.6992, + "step": 2760 + }, + { + "epoch": 0.13, + "learning_rate": 1.748513323346498e-05, + "loss": 0.6832, + "step": 2770 + }, + { + "epoch": 0.13, + "learning_rate": 1.7476054292069545e-05, + "loss": 0.7281, + "step": 2780 + }, + { + "epoch": 0.13, + "learning_rate": 1.7466975350674112e-05, + "loss": 0.7684, + "step": 2790 + }, + { + "epoch": 0.13, + "learning_rate": 1.745789640927868e-05, + "loss": 0.693, + "step": 2800 + }, + { + "epoch": 0.13, + "learning_rate": 1.7448817467883244e-05, + "loss": 0.7365, + "step": 2810 + }, + { + "epoch": 0.13, + "learning_rate": 1.743973852648781e-05, + "loss": 0.6813, + "step": 2820 + }, + { + "epoch": 0.13, + "learning_rate": 1.743065958509238e-05, + "loss": 0.6824, + "step": 2830 + }, + { + "epoch": 0.13, + "learning_rate": 1.7421580643696947e-05, + "loss": 0.7217, + "step": 2840 + }, + { + "epoch": 0.13, + "learning_rate": 1.7412501702301514e-05, + "loss": 0.7031, + "step": 2850 + }, + { + "epoch": 0.13, + "learning_rate": 1.740342276090608e-05, + "loss": 0.7348, + "step": 2860 + }, + { + "epoch": 0.13, + "learning_rate": 1.7394343819510646e-05, + "loss": 0.6928, + "step": 2870 + }, + { + "epoch": 0.13, + "learning_rate": 1.7385264878115214e-05, + "loss": 0.7035, + "step": 2880 + }, + { + "epoch": 0.13, + "learning_rate": 1.737618593671978e-05, + "loss": 0.6838, + "step": 2890 + }, + { + "epoch": 0.13, + "learning_rate": 1.736710699532435e-05, + "loss": 0.6586, + "step": 2900 + }, + { + "epoch": 0.13, + "learning_rate": 1.7358028053928913e-05, + "loss": 0.6711, + "step": 2910 + }, + { + "epoch": 0.13, + "learning_rate": 1.734894911253348e-05, + "loss": 0.693, + "step": 2920 + }, + { + "epoch": 0.13, + "learning_rate": 1.7339870171138048e-05, + "loss": 0.701, + "step": 2930 + }, + { + "epoch": 0.13, + "learning_rate": 1.7330791229742615e-05, + "loss": 0.598, + "step": 2940 + }, + { + "epoch": 0.13, + "learning_rate": 1.732171228834718e-05, + "loss": 0.673, + "step": 2950 + }, + { + "epoch": 0.13, + "learning_rate": 1.7312633346951747e-05, + "loss": 0.6502, + "step": 2960 + }, + { + "epoch": 0.13, + "learning_rate": 1.7303554405556315e-05, + "loss": 0.7301, + "step": 2970 + }, + { + "epoch": 0.14, + "learning_rate": 1.729447546416088e-05, + "loss": 0.6961, + "step": 2980 + }, + { + "epoch": 0.14, + "learning_rate": 1.7285396522765446e-05, + "loss": 0.7199, + "step": 2990 + }, + { + "epoch": 0.14, + "learning_rate": 1.7276317581370014e-05, + "loss": 0.6605, + "step": 3000 + }, + { + "epoch": 0.14, + "eval_accuracy": 0.5806373732496378, + "eval_loss": 0.7019612193107605, + "eval_runtime": 75.2508, + "eval_samples_per_second": 55.043, + "eval_steps_per_second": 13.767, + "step": 3000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7267238639974578e-05, + "loss": 0.6375, + "step": 3010 + }, + { + "epoch": 0.14, + "learning_rate": 1.7258159698579146e-05, + "loss": 0.7408, + "step": 3020 + }, + { + "epoch": 0.14, + "learning_rate": 1.7249080757183713e-05, + "loss": 0.6504, + "step": 3030 + }, + { + "epoch": 0.14, + "learning_rate": 1.724000181578828e-05, + "loss": 0.7404, + "step": 3040 + }, + { + "epoch": 0.14, + "learning_rate": 1.7230922874392848e-05, + "loss": 0.7684, + "step": 3050 + }, + { + "epoch": 0.14, + "learning_rate": 1.7221843932997412e-05, + "loss": 0.6043, + "step": 3060 + }, + { + "epoch": 0.14, + "learning_rate": 1.721276499160198e-05, + "loss": 0.6562, + "step": 3070 + }, + { + "epoch": 0.14, + "learning_rate": 1.7203686050206547e-05, + "loss": 0.7309, + "step": 3080 + }, + { + "epoch": 0.14, + "learning_rate": 1.7194607108811115e-05, + "loss": 0.6188, + "step": 3090 + }, + { + "epoch": 0.14, + "learning_rate": 1.7185528167415683e-05, + "loss": 0.767, + "step": 3100 + }, + { + "epoch": 0.14, + "learning_rate": 1.7176449226020247e-05, + "loss": 0.7379, + "step": 3110 + }, + { + "epoch": 0.14, + "learning_rate": 1.7167370284624814e-05, + "loss": 0.732, + "step": 3120 + }, + { + "epoch": 0.14, + "learning_rate": 1.7158291343229382e-05, + "loss": 0.7145, + "step": 3130 + }, + { + "epoch": 0.14, + "learning_rate": 1.714921240183395e-05, + "loss": 0.7035, + "step": 3140 + }, + { + "epoch": 0.14, + "learning_rate": 1.7140133460438513e-05, + "loss": 0.759, + "step": 3150 + }, + { + "epoch": 0.14, + "learning_rate": 1.713105451904308e-05, + "loss": 0.7232, + "step": 3160 + }, + { + "epoch": 0.14, + "learning_rate": 1.712197557764765e-05, + "loss": 0.6621, + "step": 3170 + }, + { + "epoch": 0.14, + "learning_rate": 1.7112896636252213e-05, + "loss": 0.6104, + "step": 3180 + }, + { + "epoch": 0.14, + "learning_rate": 1.710381769485678e-05, + "loss": 0.7688, + "step": 3190 + }, + { + "epoch": 0.15, + "learning_rate": 1.7094738753461348e-05, + "loss": 0.6383, + "step": 3200 + }, + { + "epoch": 0.15, + "learning_rate": 1.7085659812065915e-05, + "loss": 0.6279, + "step": 3210 + }, + { + "epoch": 0.15, + "learning_rate": 1.707658087067048e-05, + "loss": 0.8711, + "step": 3220 + }, + { + "epoch": 0.15, + "learning_rate": 1.7067501929275047e-05, + "loss": 0.7449, + "step": 3230 + }, + { + "epoch": 0.15, + "learning_rate": 1.7058422987879615e-05, + "loss": 0.6668, + "step": 3240 + }, + { + "epoch": 0.15, + "learning_rate": 1.7049344046484182e-05, + "loss": 0.6992, + "step": 3250 + }, + { + "epoch": 0.15, + "learning_rate": 1.704026510508875e-05, + "loss": 0.6529, + "step": 3260 + }, + { + "epoch": 0.15, + "learning_rate": 1.7031186163693314e-05, + "loss": 0.6301, + "step": 3270 + }, + { + "epoch": 0.15, + "learning_rate": 1.702210722229788e-05, + "loss": 0.7, + "step": 3280 + }, + { + "epoch": 0.15, + "learning_rate": 1.701302828090245e-05, + "loss": 0.7004, + "step": 3290 + }, + { + "epoch": 0.15, + "learning_rate": 1.7003949339507016e-05, + "loss": 0.6744, + "step": 3300 + }, + { + "epoch": 0.15, + "learning_rate": 1.6994870398111584e-05, + "loss": 0.6875, + "step": 3310 + }, + { + "epoch": 0.15, + "learning_rate": 1.6985791456716148e-05, + "loss": 0.7172, + "step": 3320 + }, + { + "epoch": 0.15, + "learning_rate": 1.6976712515320716e-05, + "loss": 0.7213, + "step": 3330 + }, + { + "epoch": 0.15, + "learning_rate": 1.6967633573925283e-05, + "loss": 0.6945, + "step": 3340 + }, + { + "epoch": 0.15, + "learning_rate": 1.6958554632529847e-05, + "loss": 0.6578, + "step": 3350 + }, + { + "epoch": 0.15, + "learning_rate": 1.6949475691134415e-05, + "loss": 0.7834, + "step": 3360 + }, + { + "epoch": 0.15, + "learning_rate": 1.6940396749738982e-05, + "loss": 0.6871, + "step": 3370 + }, + { + "epoch": 0.15, + "learning_rate": 1.6931317808343547e-05, + "loss": 0.6965, + "step": 3380 + }, + { + "epoch": 0.15, + "learning_rate": 1.6922238866948114e-05, + "loss": 0.6832, + "step": 3390 + }, + { + "epoch": 0.15, + "learning_rate": 1.6913159925552682e-05, + "loss": 0.6949, + "step": 3400 + }, + { + "epoch": 0.15, + "learning_rate": 1.690408098415725e-05, + "loss": 0.7303, + "step": 3410 + }, + { + "epoch": 0.16, + "learning_rate": 1.6895002042761813e-05, + "loss": 0.7215, + "step": 3420 + }, + { + "epoch": 0.16, + "learning_rate": 1.688592310136638e-05, + "loss": 0.6664, + "step": 3430 + }, + { + "epoch": 0.16, + "learning_rate": 1.687684415997095e-05, + "loss": 0.7107, + "step": 3440 + }, + { + "epoch": 0.16, + "learning_rate": 1.6867765218575516e-05, + "loss": 0.7289, + "step": 3450 + }, + { + "epoch": 0.16, + "learning_rate": 1.6858686277180084e-05, + "loss": 0.6801, + "step": 3460 + }, + { + "epoch": 0.16, + "learning_rate": 1.6849607335784648e-05, + "loss": 0.6475, + "step": 3470 + }, + { + "epoch": 0.16, + "learning_rate": 1.6840528394389215e-05, + "loss": 0.6285, + "step": 3480 + }, + { + "epoch": 0.16, + "learning_rate": 1.6831449452993783e-05, + "loss": 0.6699, + "step": 3490 + }, + { + "epoch": 0.16, + "learning_rate": 1.682237051159835e-05, + "loss": 0.7424, + "step": 3500 + }, + { + "epoch": 0.16, + "eval_accuracy": 0.5782230806373733, + "eval_loss": 0.6958387494087219, + "eval_runtime": 74.8034, + "eval_samples_per_second": 55.372, + "eval_steps_per_second": 13.85, + "step": 3500 + }, + { + "epoch": 0.16, + "learning_rate": 1.6813291570202918e-05, + "loss": 0.676, + "step": 3510 + }, + { + "epoch": 0.16, + "learning_rate": 1.6804212628807482e-05, + "loss": 0.5826, + "step": 3520 + }, + { + "epoch": 0.16, + "learning_rate": 1.679513368741205e-05, + "loss": 0.7164, + "step": 3530 + }, + { + "epoch": 0.16, + "learning_rate": 1.6786054746016617e-05, + "loss": 0.8461, + "step": 3540 + }, + { + "epoch": 0.16, + "learning_rate": 1.677697580462118e-05, + "loss": 0.7219, + "step": 3550 + }, + { + "epoch": 0.16, + "learning_rate": 1.676789686322575e-05, + "loss": 0.7793, + "step": 3560 + }, + { + "epoch": 0.16, + "learning_rate": 1.6758817921830316e-05, + "loss": 0.6773, + "step": 3570 + }, + { + "epoch": 0.16, + "learning_rate": 1.674973898043488e-05, + "loss": 0.6342, + "step": 3580 + }, + { + "epoch": 0.16, + "learning_rate": 1.6740660039039448e-05, + "loss": 0.6426, + "step": 3590 + }, + { + "epoch": 0.16, + "learning_rate": 1.6731581097644016e-05, + "loss": 0.8352, + "step": 3600 + }, + { + "epoch": 0.16, + "learning_rate": 1.6722502156248583e-05, + "loss": 0.673, + "step": 3610 + }, + { + "epoch": 0.16, + "learning_rate": 1.671342321485315e-05, + "loss": 0.6305, + "step": 3620 + }, + { + "epoch": 0.16, + "learning_rate": 1.6704344273457715e-05, + "loss": 0.6246, + "step": 3630 + }, + { + "epoch": 0.17, + "learning_rate": 1.6695265332062282e-05, + "loss": 0.6191, + "step": 3640 + }, + { + "epoch": 0.17, + "learning_rate": 1.668618639066685e-05, + "loss": 0.6887, + "step": 3650 + }, + { + "epoch": 0.17, + "learning_rate": 1.6677107449271418e-05, + "loss": 0.6418, + "step": 3660 + }, + { + "epoch": 0.17, + "learning_rate": 1.6668028507875985e-05, + "loss": 0.727, + "step": 3670 + }, + { + "epoch": 0.17, + "learning_rate": 1.665894956648055e-05, + "loss": 0.6848, + "step": 3680 + }, + { + "epoch": 0.17, + "learning_rate": 1.6649870625085117e-05, + "loss": 0.7105, + "step": 3690 + }, + { + "epoch": 0.17, + "learning_rate": 1.6640791683689684e-05, + "loss": 0.7939, + "step": 3700 + }, + { + "epoch": 0.17, + "learning_rate": 1.6631712742294252e-05, + "loss": 0.6701, + "step": 3710 + }, + { + "epoch": 0.17, + "learning_rate": 1.6622633800898816e-05, + "loss": 0.7344, + "step": 3720 + }, + { + "epoch": 0.17, + "learning_rate": 1.6613554859503384e-05, + "loss": 0.6816, + "step": 3730 + }, + { + "epoch": 0.17, + "learning_rate": 1.660447591810795e-05, + "loss": 0.6516, + "step": 3740 + }, + { + "epoch": 0.17, + "learning_rate": 1.6595396976712515e-05, + "loss": 0.6641, + "step": 3750 + }, + { + "epoch": 0.17, + "learning_rate": 1.6586318035317083e-05, + "loss": 0.6396, + "step": 3760 + }, + { + "epoch": 0.17, + "learning_rate": 1.657723909392165e-05, + "loss": 0.6596, + "step": 3770 + }, + { + "epoch": 0.17, + "learning_rate": 1.6568160152526214e-05, + "loss": 0.6672, + "step": 3780 + }, + { + "epoch": 0.17, + "learning_rate": 1.6559081211130782e-05, + "loss": 0.7094, + "step": 3790 + }, + { + "epoch": 0.17, + "learning_rate": 1.655000226973535e-05, + "loss": 0.7512, + "step": 3800 + }, + { + "epoch": 0.17, + "learning_rate": 1.6540923328339917e-05, + "loss": 0.7082, + "step": 3810 + }, + { + "epoch": 0.17, + "learning_rate": 1.6531844386944485e-05, + "loss": 0.6738, + "step": 3820 + }, + { + "epoch": 0.17, + "learning_rate": 1.652276544554905e-05, + "loss": 0.659, + "step": 3830 + }, + { + "epoch": 0.17, + "learning_rate": 1.6513686504153616e-05, + "loss": 0.725, + "step": 3840 + }, + { + "epoch": 0.17, + "learning_rate": 1.6504607562758184e-05, + "loss": 0.6949, + "step": 3850 + }, + { + "epoch": 0.18, + "learning_rate": 1.649552862136275e-05, + "loss": 0.7621, + "step": 3860 + }, + { + "epoch": 0.18, + "learning_rate": 1.648644967996732e-05, + "loss": 0.5955, + "step": 3870 + }, + { + "epoch": 0.18, + "learning_rate": 1.6477370738571883e-05, + "loss": 0.7156, + "step": 3880 + }, + { + "epoch": 0.18, + "learning_rate": 1.646829179717645e-05, + "loss": 0.7133, + "step": 3890 + }, + { + "epoch": 0.18, + "learning_rate": 1.6459212855781018e-05, + "loss": 0.5943, + "step": 3900 + }, + { + "epoch": 0.18, + "learning_rate": 1.6450133914385586e-05, + "loss": 0.7309, + "step": 3910 + }, + { + "epoch": 0.18, + "learning_rate": 1.644105497299015e-05, + "loss": 0.6729, + "step": 3920 + }, + { + "epoch": 0.18, + "learning_rate": 1.6431976031594717e-05, + "loss": 0.6658, + "step": 3930 + }, + { + "epoch": 0.18, + "learning_rate": 1.6422897090199285e-05, + "loss": 0.592, + "step": 3940 + }, + { + "epoch": 0.18, + "learning_rate": 1.641381814880385e-05, + "loss": 0.6889, + "step": 3950 + }, + { + "epoch": 0.18, + "learning_rate": 1.6404739207408417e-05, + "loss": 0.6473, + "step": 3960 + }, + { + "epoch": 0.18, + "learning_rate": 1.6395660266012984e-05, + "loss": 0.6818, + "step": 3970 + }, + { + "epoch": 0.18, + "learning_rate": 1.6386581324617552e-05, + "loss": 0.6381, + "step": 3980 + }, + { + "epoch": 0.18, + "learning_rate": 1.6377502383222116e-05, + "loss": 0.7305, + "step": 3990 + }, + { + "epoch": 0.18, + "learning_rate": 1.6368423441826683e-05, + "loss": 0.6549, + "step": 4000 + }, + { + "epoch": 0.18, + "eval_accuracy": 0.5782230806373733, + "eval_loss": 0.6909931898117065, + "eval_runtime": 72.1497, + "eval_samples_per_second": 57.408, + "eval_steps_per_second": 14.359, + "step": 4000 + }, + { + "epoch": 0.18, + "learning_rate": 1.635934450043125e-05, + "loss": 0.715, + "step": 4010 + }, + { + "epoch": 0.18, + "learning_rate": 1.635026555903582e-05, + "loss": 0.668, + "step": 4020 + }, + { + "epoch": 0.18, + "learning_rate": 1.6341186617640386e-05, + "loss": 0.6637, + "step": 4030 + }, + { + "epoch": 0.18, + "learning_rate": 1.633210767624495e-05, + "loss": 0.6697, + "step": 4040 + }, + { + "epoch": 0.18, + "learning_rate": 1.6323028734849518e-05, + "loss": 0.5869, + "step": 4050 + }, + { + "epoch": 0.18, + "learning_rate": 1.6313949793454085e-05, + "loss": 0.6004, + "step": 4060 + }, + { + "epoch": 0.18, + "learning_rate": 1.6304870852058653e-05, + "loss": 0.775, + "step": 4070 + }, + { + "epoch": 0.19, + "learning_rate": 1.629579191066322e-05, + "loss": 0.6416, + "step": 4080 + }, + { + "epoch": 0.19, + "learning_rate": 1.6286712969267785e-05, + "loss": 0.641, + "step": 4090 + }, + { + "epoch": 0.19, + "learning_rate": 1.6277634027872352e-05, + "loss": 0.7406, + "step": 4100 + }, + { + "epoch": 0.19, + "learning_rate": 1.626855508647692e-05, + "loss": 0.6365, + "step": 4110 + }, + { + "epoch": 0.19, + "learning_rate": 1.6259476145081484e-05, + "loss": 0.6926, + "step": 4120 + }, + { + "epoch": 0.19, + "learning_rate": 1.625039720368605e-05, + "loss": 0.6426, + "step": 4130 + }, + { + "epoch": 0.19, + "learning_rate": 1.624131826229062e-05, + "loss": 0.7107, + "step": 4140 + }, + { + "epoch": 0.19, + "learning_rate": 1.6232239320895183e-05, + "loss": 0.65, + "step": 4150 + }, + { + "epoch": 0.19, + "learning_rate": 1.622316037949975e-05, + "loss": 0.7195, + "step": 4160 + }, + { + "epoch": 0.19, + "learning_rate": 1.6214081438104318e-05, + "loss": 0.5832, + "step": 4170 + }, + { + "epoch": 0.19, + "learning_rate": 1.6205002496708886e-05, + "loss": 0.7441, + "step": 4180 + }, + { + "epoch": 0.19, + "learning_rate": 1.619592355531345e-05, + "loss": 0.627, + "step": 4190 + }, + { + "epoch": 0.19, + "learning_rate": 1.6186844613918017e-05, + "loss": 0.6598, + "step": 4200 + }, + { + "epoch": 0.19, + "learning_rate": 1.6177765672522585e-05, + "loss": 0.6814, + "step": 4210 + }, + { + "epoch": 0.19, + "learning_rate": 1.6168686731127152e-05, + "loss": 0.7156, + "step": 4220 + }, + { + "epoch": 0.19, + "learning_rate": 1.615960778973172e-05, + "loss": 0.6285, + "step": 4230 + }, + { + "epoch": 0.19, + "learning_rate": 1.6150528848336284e-05, + "loss": 0.7143, + "step": 4240 + }, + { + "epoch": 0.19, + "learning_rate": 1.6141449906940852e-05, + "loss": 0.6299, + "step": 4250 + }, + { + "epoch": 0.19, + "learning_rate": 1.613237096554542e-05, + "loss": 0.6852, + "step": 4260 + }, + { + "epoch": 0.19, + "learning_rate": 1.6123292024149987e-05, + "loss": 0.7059, + "step": 4270 + }, + { + "epoch": 0.19, + "learning_rate": 1.6114213082754554e-05, + "loss": 0.6936, + "step": 4280 + }, + { + "epoch": 0.19, + "learning_rate": 1.610513414135912e-05, + "loss": 0.5746, + "step": 4290 + }, + { + "epoch": 0.2, + "learning_rate": 1.6096055199963686e-05, + "loss": 0.7225, + "step": 4300 + }, + { + "epoch": 0.2, + "learning_rate": 1.6086976258568254e-05, + "loss": 0.6955, + "step": 4310 + }, + { + "epoch": 0.2, + "learning_rate": 1.6077897317172818e-05, + "loss": 0.7551, + "step": 4320 + }, + { + "epoch": 0.2, + "learning_rate": 1.6068818375777385e-05, + "loss": 0.6752, + "step": 4330 + }, + { + "epoch": 0.2, + "learning_rate": 1.6059739434381953e-05, + "loss": 0.7883, + "step": 4340 + }, + { + "epoch": 0.2, + "learning_rate": 1.6050660492986517e-05, + "loss": 0.7078, + "step": 4350 + }, + { + "epoch": 0.2, + "learning_rate": 1.6041581551591085e-05, + "loss": 0.8699, + "step": 4360 + }, + { + "epoch": 0.2, + "learning_rate": 1.6032502610195652e-05, + "loss": 0.6383, + "step": 4370 + }, + { + "epoch": 0.2, + "learning_rate": 1.602342366880022e-05, + "loss": 0.6402, + "step": 4380 + }, + { + "epoch": 0.2, + "learning_rate": 1.6014344727404787e-05, + "loss": 0.6916, + "step": 4390 + }, + { + "epoch": 0.2, + "learning_rate": 1.600526578600935e-05, + "loss": 0.6859, + "step": 4400 + }, + { + "epoch": 0.2, + "learning_rate": 1.599618684461392e-05, + "loss": 0.6668, + "step": 4410 + }, + { + "epoch": 0.2, + "learning_rate": 1.5987107903218486e-05, + "loss": 0.7312, + "step": 4420 + }, + { + "epoch": 0.2, + "learning_rate": 1.5978028961823054e-05, + "loss": 0.6523, + "step": 4430 + }, + { + "epoch": 0.2, + "learning_rate": 1.596895002042762e-05, + "loss": 0.6686, + "step": 4440 + }, + { + "epoch": 0.2, + "learning_rate": 1.5959871079032186e-05, + "loss": 0.7184, + "step": 4450 + }, + { + "epoch": 0.2, + "learning_rate": 1.5950792137636753e-05, + "loss": 0.65, + "step": 4460 + }, + { + "epoch": 0.2, + "learning_rate": 1.594171319624132e-05, + "loss": 0.7178, + "step": 4470 + }, + { + "epoch": 0.2, + "learning_rate": 1.5932634254845888e-05, + "loss": 0.6918, + "step": 4480 + }, + { + "epoch": 0.2, + "learning_rate": 1.5923555313450452e-05, + "loss": 0.5949, + "step": 4490 + }, + { + "epoch": 0.2, + "learning_rate": 1.591447637205502e-05, + "loss": 0.6268, + "step": 4500 + }, + { + "epoch": 0.2, + "eval_accuracy": 0.5929502655721873, + "eval_loss": 0.6875754594802856, + "eval_runtime": 74.4346, + "eval_samples_per_second": 55.646, + "eval_steps_per_second": 13.918, + "step": 4500 + }, + { + "epoch": 0.2, + "learning_rate": 1.5905397430659588e-05, + "loss": 0.6125, + "step": 4510 + }, + { + "epoch": 0.21, + "learning_rate": 1.589631848926415e-05, + "loss": 0.8043, + "step": 4520 + }, + { + "epoch": 0.21, + "learning_rate": 1.588723954786872e-05, + "loss": 0.6482, + "step": 4530 + }, + { + "epoch": 0.21, + "learning_rate": 1.5878160606473287e-05, + "loss": 0.7039, + "step": 4540 + }, + { + "epoch": 0.21, + "learning_rate": 1.586908166507785e-05, + "loss": 0.6203, + "step": 4550 + }, + { + "epoch": 0.21, + "learning_rate": 1.586000272368242e-05, + "loss": 0.6557, + "step": 4560 + }, + { + "epoch": 0.21, + "learning_rate": 1.5850923782286986e-05, + "loss": 0.6105, + "step": 4570 + }, + { + "epoch": 0.21, + "learning_rate": 1.5841844840891554e-05, + "loss": 0.8234, + "step": 4580 + }, + { + "epoch": 0.21, + "learning_rate": 1.583276589949612e-05, + "loss": 0.6199, + "step": 4590 + }, + { + "epoch": 0.21, + "learning_rate": 1.5823686958100685e-05, + "loss": 0.675, + "step": 4600 + }, + { + "epoch": 0.21, + "learning_rate": 1.5814608016705253e-05, + "loss": 0.6316, + "step": 4610 + }, + { + "epoch": 0.21, + "learning_rate": 1.580552907530982e-05, + "loss": 0.7779, + "step": 4620 + }, + { + "epoch": 0.21, + "learning_rate": 1.5796450133914388e-05, + "loss": 0.5781, + "step": 4630 + }, + { + "epoch": 0.21, + "learning_rate": 1.5787371192518955e-05, + "loss": 0.7367, + "step": 4640 + }, + { + "epoch": 0.21, + "learning_rate": 1.577829225112352e-05, + "loss": 0.7154, + "step": 4650 + }, + { + "epoch": 0.21, + "learning_rate": 1.5769213309728087e-05, + "loss": 0.7148, + "step": 4660 + }, + { + "epoch": 0.21, + "learning_rate": 1.5760134368332655e-05, + "loss": 0.6227, + "step": 4670 + }, + { + "epoch": 0.21, + "learning_rate": 1.5751055426937222e-05, + "loss": 0.5682, + "step": 4680 + }, + { + "epoch": 0.21, + "learning_rate": 1.5741976485541786e-05, + "loss": 0.6184, + "step": 4690 + }, + { + "epoch": 0.21, + "learning_rate": 1.5732897544146354e-05, + "loss": 0.6588, + "step": 4700 + }, + { + "epoch": 0.21, + "learning_rate": 1.572381860275092e-05, + "loss": 0.6627, + "step": 4710 + }, + { + "epoch": 0.21, + "learning_rate": 1.5714739661355486e-05, + "loss": 0.6479, + "step": 4720 + }, + { + "epoch": 0.21, + "learning_rate": 1.5705660719960053e-05, + "loss": 0.7141, + "step": 4730 + }, + { + "epoch": 0.22, + "learning_rate": 1.569658177856462e-05, + "loss": 0.6545, + "step": 4740 + }, + { + "epoch": 0.22, + "learning_rate": 1.5687502837169188e-05, + "loss": 0.6789, + "step": 4750 + }, + { + "epoch": 0.22, + "learning_rate": 1.5678423895773752e-05, + "loss": 0.6906, + "step": 4760 + }, + { + "epoch": 0.22, + "learning_rate": 1.566934495437832e-05, + "loss": 0.7527, + "step": 4770 + }, + { + "epoch": 0.22, + "learning_rate": 1.5660266012982887e-05, + "loss": 0.599, + "step": 4780 + }, + { + "epoch": 0.22, + "learning_rate": 1.5651187071587455e-05, + "loss": 0.7096, + "step": 4790 + }, + { + "epoch": 0.22, + "learning_rate": 1.5642108130192023e-05, + "loss": 0.6578, + "step": 4800 + }, + { + "epoch": 0.22, + "learning_rate": 1.5633029188796587e-05, + "loss": 0.6953, + "step": 4810 + }, + { + "epoch": 0.22, + "learning_rate": 1.5623950247401154e-05, + "loss": 0.7516, + "step": 4820 + }, + { + "epoch": 0.22, + "learning_rate": 1.5614871306005722e-05, + "loss": 0.6023, + "step": 4830 + }, + { + "epoch": 0.22, + "learning_rate": 1.560579236461029e-05, + "loss": 0.6684, + "step": 4840 + }, + { + "epoch": 0.22, + "learning_rate": 1.5596713423214857e-05, + "loss": 0.748, + "step": 4850 + }, + { + "epoch": 0.22, + "learning_rate": 1.558763448181942e-05, + "loss": 0.6287, + "step": 4860 + }, + { + "epoch": 0.22, + "learning_rate": 1.557855554042399e-05, + "loss": 0.7781, + "step": 4870 + }, + { + "epoch": 0.22, + "learning_rate": 1.5569476599028556e-05, + "loss": 0.7213, + "step": 4880 + }, + { + "epoch": 0.22, + "learning_rate": 1.556039765763312e-05, + "loss": 0.673, + "step": 4890 + }, + { + "epoch": 0.22, + "learning_rate": 1.5551318716237688e-05, + "loss": 0.8391, + "step": 4900 + }, + { + "epoch": 0.22, + "learning_rate": 1.5542239774842255e-05, + "loss": 0.7537, + "step": 4910 + }, + { + "epoch": 0.22, + "learning_rate": 1.553316083344682e-05, + "loss": 0.6406, + "step": 4920 + }, + { + "epoch": 0.22, + "learning_rate": 1.5524081892051387e-05, + "loss": 0.6082, + "step": 4930 + }, + { + "epoch": 0.22, + "learning_rate": 1.5515002950655955e-05, + "loss": 0.7135, + "step": 4940 + }, + { + "epoch": 0.22, + "learning_rate": 1.5505924009260522e-05, + "loss": 0.6555, + "step": 4950 + }, + { + "epoch": 0.23, + "learning_rate": 1.5496845067865086e-05, + "loss": 0.6604, + "step": 4960 + }, + { + "epoch": 0.23, + "learning_rate": 1.5487766126469654e-05, + "loss": 0.7117, + "step": 4970 + }, + { + "epoch": 0.23, + "learning_rate": 1.547868718507422e-05, + "loss": 0.7215, + "step": 4980 + }, + { + "epoch": 0.23, + "learning_rate": 1.546960824367879e-05, + "loss": 0.6865, + "step": 4990 + }, + { + "epoch": 0.23, + "learning_rate": 1.5460529302283356e-05, + "loss": 0.7523, + "step": 5000 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.59753742153549, + "eval_loss": 0.6818556189537048, + "eval_runtime": 74.991, + "eval_samples_per_second": 55.233, + "eval_steps_per_second": 13.815, + "step": 5000 + }, + { + "epoch": 0.23, + "learning_rate": 1.545145036088792e-05, + "loss": 0.7436, + "step": 5010 + }, + { + "epoch": 0.23, + "learning_rate": 1.5442371419492488e-05, + "loss": 0.7418, + "step": 5020 + }, + { + "epoch": 0.23, + "learning_rate": 1.5433292478097056e-05, + "loss": 0.6656, + "step": 5030 + }, + { + "epoch": 0.23, + "learning_rate": 1.5424213536701623e-05, + "loss": 0.7031, + "step": 5040 + }, + { + "epoch": 0.23, + "learning_rate": 1.541513459530619e-05, + "loss": 0.6496, + "step": 5050 + }, + { + "epoch": 0.23, + "learning_rate": 1.5406055653910755e-05, + "loss": 0.7084, + "step": 5060 + }, + { + "epoch": 0.23, + "learning_rate": 1.5396976712515323e-05, + "loss": 0.6781, + "step": 5070 + }, + { + "epoch": 0.23, + "learning_rate": 1.538789777111989e-05, + "loss": 0.7336, + "step": 5080 + }, + { + "epoch": 0.23, + "learning_rate": 1.5378818829724454e-05, + "loss": 0.6762, + "step": 5090 + }, + { + "epoch": 0.23, + "learning_rate": 1.5369739888329022e-05, + "loss": 0.6391, + "step": 5100 + }, + { + "epoch": 0.23, + "learning_rate": 1.536066094693359e-05, + "loss": 0.7719, + "step": 5110 + }, + { + "epoch": 0.23, + "learning_rate": 1.5351582005538153e-05, + "loss": 0.6418, + "step": 5120 + }, + { + "epoch": 0.23, + "learning_rate": 1.534250306414272e-05, + "loss": 0.6543, + "step": 5130 + }, + { + "epoch": 0.23, + "learning_rate": 1.533342412274729e-05, + "loss": 0.6639, + "step": 5140 + }, + { + "epoch": 0.23, + "learning_rate": 1.5324345181351856e-05, + "loss": 0.6256, + "step": 5150 + }, + { + "epoch": 0.23, + "learning_rate": 1.5315266239956424e-05, + "loss": 0.6721, + "step": 5160 + }, + { + "epoch": 0.23, + "learning_rate": 1.5306187298560988e-05, + "loss": 0.7191, + "step": 5170 + }, + { + "epoch": 0.24, + "learning_rate": 1.5297108357165555e-05, + "loss": 0.7355, + "step": 5180 + }, + { + "epoch": 0.24, + "learning_rate": 1.5288029415770123e-05, + "loss": 0.6605, + "step": 5190 + }, + { + "epoch": 0.24, + "learning_rate": 1.527895047437469e-05, + "loss": 0.6436, + "step": 5200 + }, + { + "epoch": 0.24, + "learning_rate": 1.5269871532979258e-05, + "loss": 0.6848, + "step": 5210 + }, + { + "epoch": 0.24, + "learning_rate": 1.5260792591583822e-05, + "loss": 0.7281, + "step": 5220 + }, + { + "epoch": 0.24, + "learning_rate": 1.525171365018839e-05, + "loss": 0.6137, + "step": 5230 + }, + { + "epoch": 0.24, + "learning_rate": 1.5242634708792955e-05, + "loss": 0.7535, + "step": 5240 + }, + { + "epoch": 0.24, + "learning_rate": 1.5233555767397523e-05, + "loss": 0.7076, + "step": 5250 + }, + { + "epoch": 0.24, + "learning_rate": 1.522447682600209e-05, + "loss": 0.6793, + "step": 5260 + }, + { + "epoch": 0.24, + "learning_rate": 1.5215397884606655e-05, + "loss": 0.7406, + "step": 5270 + }, + { + "epoch": 0.24, + "learning_rate": 1.5206318943211222e-05, + "loss": 0.7, + "step": 5280 + }, + { + "epoch": 0.24, + "learning_rate": 1.519724000181579e-05, + "loss": 0.6547, + "step": 5290 + }, + { + "epoch": 0.24, + "learning_rate": 1.5188161060420357e-05, + "loss": 0.6844, + "step": 5300 + }, + { + "epoch": 0.24, + "learning_rate": 1.5179082119024923e-05, + "loss": 0.6543, + "step": 5310 + }, + { + "epoch": 0.24, + "learning_rate": 1.5170003177629489e-05, + "loss": 0.7021, + "step": 5320 + }, + { + "epoch": 0.24, + "learning_rate": 1.5160924236234057e-05, + "loss": 0.6594, + "step": 5330 + }, + { + "epoch": 0.24, + "learning_rate": 1.5151845294838622e-05, + "loss": 0.7711, + "step": 5340 + }, + { + "epoch": 0.24, + "learning_rate": 1.514276635344319e-05, + "loss": 0.6973, + "step": 5350 + }, + { + "epoch": 0.24, + "learning_rate": 1.5133687412047758e-05, + "loss": 0.6928, + "step": 5360 + }, + { + "epoch": 0.24, + "learning_rate": 1.5124608470652322e-05, + "loss": 0.7063, + "step": 5370 + }, + { + "epoch": 0.24, + "learning_rate": 1.511552952925689e-05, + "loss": 0.6875, + "step": 5380 + }, + { + "epoch": 0.24, + "learning_rate": 1.5106450587861457e-05, + "loss": 0.65, + "step": 5390 + }, + { + "epoch": 0.25, + "learning_rate": 1.5097371646466024e-05, + "loss": 0.6594, + "step": 5400 + }, + { + "epoch": 0.25, + "learning_rate": 1.508829270507059e-05, + "loss": 0.6527, + "step": 5410 + }, + { + "epoch": 0.25, + "learning_rate": 1.5079213763675156e-05, + "loss": 0.6918, + "step": 5420 + }, + { + "epoch": 0.25, + "learning_rate": 1.5070134822279724e-05, + "loss": 0.7523, + "step": 5430 + }, + { + "epoch": 0.25, + "learning_rate": 1.506105588088429e-05, + "loss": 0.6365, + "step": 5440 + }, + { + "epoch": 0.25, + "learning_rate": 1.5051976939488857e-05, + "loss": 0.7805, + "step": 5450 + }, + { + "epoch": 0.25, + "learning_rate": 1.5042897998093424e-05, + "loss": 0.6734, + "step": 5460 + }, + { + "epoch": 0.25, + "learning_rate": 1.5033819056697992e-05, + "loss": 0.748, + "step": 5470 + }, + { + "epoch": 0.25, + "learning_rate": 1.5024740115302556e-05, + "loss": 0.6609, + "step": 5480 + }, + { + "epoch": 0.25, + "learning_rate": 1.5015661173907124e-05, + "loss": 0.6475, + "step": 5490 + }, + { + "epoch": 0.25, + "learning_rate": 1.5006582232511691e-05, + "loss": 0.6582, + "step": 5500 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.6028488652824723, + "eval_loss": 0.6788424849510193, + "eval_runtime": 72.2831, + "eval_samples_per_second": 57.302, + "eval_steps_per_second": 14.333, + "step": 5500 + }, + { + "epoch": 0.25, + "learning_rate": 1.4997503291116257e-05, + "loss": 0.6637, + "step": 5510 + }, + { + "epoch": 0.25, + "learning_rate": 1.4988424349720825e-05, + "loss": 0.6816, + "step": 5520 + }, + { + "epoch": 0.25, + "learning_rate": 1.497934540832539e-05, + "loss": 0.6617, + "step": 5530 + }, + { + "epoch": 0.25, + "learning_rate": 1.4970266466929956e-05, + "loss": 0.6871, + "step": 5540 + }, + { + "epoch": 0.25, + "learning_rate": 1.4961187525534524e-05, + "loss": 0.6793, + "step": 5550 + }, + { + "epoch": 0.25, + "learning_rate": 1.4952108584139091e-05, + "loss": 0.6922, + "step": 5560 + }, + { + "epoch": 0.25, + "learning_rate": 1.4943029642743659e-05, + "loss": 0.5648, + "step": 5570 + }, + { + "epoch": 0.25, + "learning_rate": 1.4933950701348223e-05, + "loss": 0.6439, + "step": 5580 + }, + { + "epoch": 0.25, + "learning_rate": 1.492487175995279e-05, + "loss": 0.7004, + "step": 5590 + }, + { + "epoch": 0.25, + "learning_rate": 1.4915792818557358e-05, + "loss": 0.8055, + "step": 5600 + }, + { + "epoch": 0.25, + "learning_rate": 1.4906713877161924e-05, + "loss": 0.5594, + "step": 5610 + }, + { + "epoch": 0.26, + "learning_rate": 1.4897634935766492e-05, + "loss": 0.8199, + "step": 5620 + }, + { + "epoch": 0.26, + "learning_rate": 1.4888555994371057e-05, + "loss": 0.6451, + "step": 5630 + }, + { + "epoch": 0.26, + "learning_rate": 1.4879477052975623e-05, + "loss": 0.6434, + "step": 5640 + }, + { + "epoch": 0.26, + "learning_rate": 1.4870398111580191e-05, + "loss": 0.6396, + "step": 5650 + }, + { + "epoch": 0.26, + "learning_rate": 1.4861319170184758e-05, + "loss": 0.5916, + "step": 5660 + }, + { + "epoch": 0.26, + "learning_rate": 1.4852240228789326e-05, + "loss": 0.7096, + "step": 5670 + }, + { + "epoch": 0.26, + "learning_rate": 1.484316128739389e-05, + "loss": 0.6758, + "step": 5680 + }, + { + "epoch": 0.26, + "learning_rate": 1.4834082345998458e-05, + "loss": 0.6438, + "step": 5690 + }, + { + "epoch": 0.26, + "learning_rate": 1.4825003404603025e-05, + "loss": 0.6777, + "step": 5700 + }, + { + "epoch": 0.26, + "learning_rate": 1.4815924463207591e-05, + "loss": 0.7172, + "step": 5710 + }, + { + "epoch": 0.26, + "learning_rate": 1.4806845521812159e-05, + "loss": 0.7588, + "step": 5720 + }, + { + "epoch": 0.26, + "learning_rate": 1.4797766580416724e-05, + "loss": 0.651, + "step": 5730 + }, + { + "epoch": 0.26, + "learning_rate": 1.478868763902129e-05, + "loss": 0.6287, + "step": 5740 + }, + { + "epoch": 0.26, + "learning_rate": 1.4779608697625858e-05, + "loss": 0.6875, + "step": 5750 + }, + { + "epoch": 0.26, + "learning_rate": 1.4770529756230425e-05, + "loss": 0.6582, + "step": 5760 + }, + { + "epoch": 0.26, + "learning_rate": 1.4761450814834993e-05, + "loss": 0.6656, + "step": 5770 + }, + { + "epoch": 0.26, + "learning_rate": 1.4752371873439557e-05, + "loss": 0.6037, + "step": 5780 + }, + { + "epoch": 0.26, + "learning_rate": 1.4743292932044125e-05, + "loss": 0.6973, + "step": 5790 + }, + { + "epoch": 0.26, + "learning_rate": 1.4734213990648692e-05, + "loss": 0.6377, + "step": 5800 + }, + { + "epoch": 0.26, + "learning_rate": 1.4725135049253258e-05, + "loss": 0.7617, + "step": 5810 + }, + { + "epoch": 0.26, + "learning_rate": 1.4716056107857826e-05, + "loss": 0.6621, + "step": 5820 + }, + { + "epoch": 0.26, + "learning_rate": 1.4706977166462391e-05, + "loss": 0.6861, + "step": 5830 + }, + { + "epoch": 0.27, + "learning_rate": 1.4697898225066957e-05, + "loss": 0.6504, + "step": 5840 + }, + { + "epoch": 0.27, + "learning_rate": 1.4688819283671525e-05, + "loss": 0.7766, + "step": 5850 + }, + { + "epoch": 0.27, + "learning_rate": 1.4679740342276092e-05, + "loss": 0.6512, + "step": 5860 + }, + { + "epoch": 0.27, + "learning_rate": 1.467066140088066e-05, + "loss": 0.698, + "step": 5870 + }, + { + "epoch": 0.27, + "learning_rate": 1.4661582459485226e-05, + "loss": 0.5723, + "step": 5880 + }, + { + "epoch": 0.27, + "learning_rate": 1.4652503518089792e-05, + "loss": 0.6986, + "step": 5890 + }, + { + "epoch": 0.27, + "learning_rate": 1.4643424576694359e-05, + "loss": 0.6633, + "step": 5900 + }, + { + "epoch": 0.27, + "learning_rate": 1.4634345635298925e-05, + "loss": 0.5979, + "step": 5910 + }, + { + "epoch": 0.27, + "learning_rate": 1.4625266693903493e-05, + "loss": 0.6746, + "step": 5920 + }, + { + "epoch": 0.27, + "learning_rate": 1.461618775250806e-05, + "loss": 0.7547, + "step": 5930 + }, + { + "epoch": 0.27, + "learning_rate": 1.4607108811112624e-05, + "loss": 0.7641, + "step": 5940 + }, + { + "epoch": 0.27, + "learning_rate": 1.4598029869717192e-05, + "loss": 0.6393, + "step": 5950 + }, + { + "epoch": 0.27, + "learning_rate": 1.458895092832176e-05, + "loss": 0.6977, + "step": 5960 + }, + { + "epoch": 0.27, + "learning_rate": 1.4579871986926327e-05, + "loss": 0.6777, + "step": 5970 + }, + { + "epoch": 0.27, + "learning_rate": 1.4570793045530893e-05, + "loss": 0.6963, + "step": 5980 + }, + { + "epoch": 0.27, + "learning_rate": 1.4561714104135459e-05, + "loss": 0.6664, + "step": 5990 + }, + { + "epoch": 0.27, + "learning_rate": 1.4552635162740026e-05, + "loss": 0.5879, + "step": 6000 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.6057460164171897, + "eval_loss": 0.6761283278465271, + "eval_runtime": 72.9085, + "eval_samples_per_second": 56.811, + "eval_steps_per_second": 14.21, + "step": 6000 + }, + { + "epoch": 0.27, + "learning_rate": 1.4543556221344592e-05, + "loss": 0.6695, + "step": 6010 + }, + { + "epoch": 0.27, + "learning_rate": 1.453447727994916e-05, + "loss": 0.5879, + "step": 6020 + }, + { + "epoch": 0.27, + "learning_rate": 1.4525398338553727e-05, + "loss": 0.7484, + "step": 6030 + }, + { + "epoch": 0.27, + "learning_rate": 1.4516319397158291e-05, + "loss": 0.6438, + "step": 6040 + }, + { + "epoch": 0.27, + "learning_rate": 1.4507240455762859e-05, + "loss": 0.6422, + "step": 6050 + }, + { + "epoch": 0.28, + "learning_rate": 1.4498161514367426e-05, + "loss": 0.718, + "step": 6060 + }, + { + "epoch": 0.28, + "learning_rate": 1.4489082572971994e-05, + "loss": 0.6305, + "step": 6070 + }, + { + "epoch": 0.28, + "learning_rate": 1.448000363157656e-05, + "loss": 0.6605, + "step": 6080 + }, + { + "epoch": 0.28, + "learning_rate": 1.4470924690181126e-05, + "loss": 0.6598, + "step": 6090 + }, + { + "epoch": 0.28, + "learning_rate": 1.4461845748785693e-05, + "loss": 0.7359, + "step": 6100 + }, + { + "epoch": 0.28, + "learning_rate": 1.4452766807390259e-05, + "loss": 0.7461, + "step": 6110 + }, + { + "epoch": 0.28, + "learning_rate": 1.4443687865994826e-05, + "loss": 0.5742, + "step": 6120 + }, + { + "epoch": 0.28, + "learning_rate": 1.4434608924599394e-05, + "loss": 0.6814, + "step": 6130 + }, + { + "epoch": 0.28, + "learning_rate": 1.4425529983203958e-05, + "loss": 0.6605, + "step": 6140 + }, + { + "epoch": 0.28, + "learning_rate": 1.4416451041808526e-05, + "loss": 0.6927, + "step": 6150 + }, + { + "epoch": 0.28, + "learning_rate": 1.4407372100413093e-05, + "loss": 0.6391, + "step": 6160 + }, + { + "epoch": 0.28, + "learning_rate": 1.439829315901766e-05, + "loss": 0.6, + "step": 6170 + }, + { + "epoch": 0.28, + "learning_rate": 1.4389214217622227e-05, + "loss": 0.6977, + "step": 6180 + }, + { + "epoch": 0.28, + "learning_rate": 1.4380135276226792e-05, + "loss": 0.6307, + "step": 6190 + }, + { + "epoch": 0.28, + "learning_rate": 1.437105633483136e-05, + "loss": 0.642, + "step": 6200 + }, + { + "epoch": 0.28, + "learning_rate": 1.4361977393435926e-05, + "loss": 0.6969, + "step": 6210 + }, + { + "epoch": 0.28, + "learning_rate": 1.4352898452040493e-05, + "loss": 0.6883, + "step": 6220 + }, + { + "epoch": 0.28, + "learning_rate": 1.4343819510645061e-05, + "loss": 0.6189, + "step": 6230 + }, + { + "epoch": 0.28, + "learning_rate": 1.4334740569249628e-05, + "loss": 0.7242, + "step": 6240 + }, + { + "epoch": 0.28, + "learning_rate": 1.4325661627854193e-05, + "loss": 0.7031, + "step": 6250 + }, + { + "epoch": 0.28, + "learning_rate": 1.431658268645876e-05, + "loss": 0.6645, + "step": 6260 + }, + { + "epoch": 0.28, + "learning_rate": 1.4307503745063328e-05, + "loss": 0.6244, + "step": 6270 + }, + { + "epoch": 0.29, + "learning_rate": 1.4298424803667894e-05, + "loss": 0.6785, + "step": 6280 + }, + { + "epoch": 0.29, + "learning_rate": 1.4289345862272461e-05, + "loss": 0.6316, + "step": 6290 + }, + { + "epoch": 0.29, + "learning_rate": 1.4280266920877027e-05, + "loss": 0.5918, + "step": 6300 + }, + { + "epoch": 0.29, + "learning_rate": 1.4271187979481593e-05, + "loss": 0.6477, + "step": 6310 + }, + { + "epoch": 0.29, + "learning_rate": 1.426210903808616e-05, + "loss": 0.6242, + "step": 6320 + }, + { + "epoch": 0.29, + "learning_rate": 1.4253030096690728e-05, + "loss": 0.7172, + "step": 6330 + }, + { + "epoch": 0.29, + "learning_rate": 1.4243951155295295e-05, + "loss": 0.5865, + "step": 6340 + }, + { + "epoch": 0.29, + "learning_rate": 1.423487221389986e-05, + "loss": 0.5662, + "step": 6350 + }, + { + "epoch": 0.29, + "learning_rate": 1.4225793272504427e-05, + "loss": 0.7059, + "step": 6360 + }, + { + "epoch": 0.29, + "learning_rate": 1.4216714331108995e-05, + "loss": 0.6787, + "step": 6370 + }, + { + "epoch": 0.29, + "learning_rate": 1.420763538971356e-05, + "loss": 0.7768, + "step": 6380 + }, + { + "epoch": 0.29, + "learning_rate": 1.4198556448318128e-05, + "loss": 0.6779, + "step": 6390 + }, + { + "epoch": 0.29, + "learning_rate": 1.4189477506922694e-05, + "loss": 0.7148, + "step": 6400 + }, + { + "epoch": 0.29, + "learning_rate": 1.418039856552726e-05, + "loss": 0.6711, + "step": 6410 + }, + { + "epoch": 0.29, + "learning_rate": 1.4171319624131827e-05, + "loss": 0.5986, + "step": 6420 + }, + { + "epoch": 0.29, + "learning_rate": 1.4162240682736395e-05, + "loss": 0.7102, + "step": 6430 + }, + { + "epoch": 0.29, + "learning_rate": 1.4153161741340962e-05, + "loss": 0.6748, + "step": 6440 + }, + { + "epoch": 0.29, + "learning_rate": 1.4144082799945527e-05, + "loss": 0.7035, + "step": 6450 + }, + { + "epoch": 0.29, + "learning_rate": 1.4135003858550094e-05, + "loss": 0.6783, + "step": 6460 + }, + { + "epoch": 0.29, + "learning_rate": 1.4125924917154662e-05, + "loss": 0.64, + "step": 6470 + }, + { + "epoch": 0.29, + "learning_rate": 1.4116845975759228e-05, + "loss": 0.5697, + "step": 6480 + }, + { + "epoch": 0.29, + "learning_rate": 1.4107767034363795e-05, + "loss": 0.6951, + "step": 6490 + }, + { + "epoch": 0.3, + "learning_rate": 1.4098688092968361e-05, + "loss": 0.6676, + "step": 6500 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.6064703042008691, + "eval_loss": 0.6781559586524963, + "eval_runtime": 74.4693, + "eval_samples_per_second": 55.62, + "eval_steps_per_second": 13.912, + "step": 6500 + }, + { + "epoch": 0.3, + "learning_rate": 1.4089609151572927e-05, + "loss": 0.7078, + "step": 6510 + }, + { + "epoch": 0.3, + "learning_rate": 1.4080530210177494e-05, + "loss": 0.7605, + "step": 6520 + }, + { + "epoch": 0.3, + "learning_rate": 1.4071451268782062e-05, + "loss": 0.7063, + "step": 6530 + }, + { + "epoch": 0.3, + "learning_rate": 1.406237232738663e-05, + "loss": 0.6715, + "step": 6540 + }, + { + "epoch": 0.3, + "learning_rate": 1.4053293385991194e-05, + "loss": 0.7559, + "step": 6550 + }, + { + "epoch": 0.3, + "learning_rate": 1.4044214444595761e-05, + "loss": 0.6426, + "step": 6560 + }, + { + "epoch": 0.3, + "learning_rate": 1.4035135503200329e-05, + "loss": 0.6973, + "step": 6570 + }, + { + "epoch": 0.3, + "learning_rate": 1.4026056561804894e-05, + "loss": 0.6502, + "step": 6580 + }, + { + "epoch": 0.3, + "learning_rate": 1.4016977620409462e-05, + "loss": 0.6734, + "step": 6590 + }, + { + "epoch": 0.3, + "learning_rate": 1.4007898679014026e-05, + "loss": 0.6262, + "step": 6600 + }, + { + "epoch": 0.3, + "learning_rate": 1.3998819737618594e-05, + "loss": 0.7363, + "step": 6610 + }, + { + "epoch": 0.3, + "learning_rate": 1.3989740796223161e-05, + "loss": 0.6805, + "step": 6620 + }, + { + "epoch": 0.3, + "learning_rate": 1.3980661854827729e-05, + "loss": 0.6609, + "step": 6630 + }, + { + "epoch": 0.3, + "learning_rate": 1.3971582913432296e-05, + "loss": 0.6365, + "step": 6640 + }, + { + "epoch": 0.3, + "learning_rate": 1.3962503972036862e-05, + "loss": 0.6311, + "step": 6650 + }, + { + "epoch": 0.3, + "learning_rate": 1.3953425030641428e-05, + "loss": 0.7283, + "step": 6660 + }, + { + "epoch": 0.3, + "learning_rate": 1.3944346089245996e-05, + "loss": 0.6016, + "step": 6670 + }, + { + "epoch": 0.3, + "learning_rate": 1.3935267147850561e-05, + "loss": 0.7059, + "step": 6680 + }, + { + "epoch": 0.3, + "learning_rate": 1.3926188206455129e-05, + "loss": 0.5814, + "step": 6690 + }, + { + "epoch": 0.3, + "learning_rate": 1.3917109265059697e-05, + "loss": 0.6375, + "step": 6700 + }, + { + "epoch": 0.3, + "learning_rate": 1.390803032366426e-05, + "loss": 0.7088, + "step": 6710 + }, + { + "epoch": 0.31, + "learning_rate": 1.3898951382268828e-05, + "loss": 0.6859, + "step": 6720 + }, + { + "epoch": 0.31, + "learning_rate": 1.3889872440873396e-05, + "loss": 0.7002, + "step": 6730 + }, + { + "epoch": 0.31, + "learning_rate": 1.3880793499477963e-05, + "loss": 0.7227, + "step": 6740 + }, + { + "epoch": 0.31, + "learning_rate": 1.3871714558082529e-05, + "loss": 0.6623, + "step": 6750 + }, + { + "epoch": 0.31, + "learning_rate": 1.3862635616687095e-05, + "loss": 0.685, + "step": 6760 + }, + { + "epoch": 0.31, + "learning_rate": 1.3853556675291663e-05, + "loss": 0.6824, + "step": 6770 + }, + { + "epoch": 0.31, + "learning_rate": 1.3844477733896228e-05, + "loss": 0.6699, + "step": 6780 + }, + { + "epoch": 0.31, + "learning_rate": 1.3835398792500796e-05, + "loss": 0.7406, + "step": 6790 + }, + { + "epoch": 0.31, + "learning_rate": 1.3826319851105363e-05, + "loss": 0.6277, + "step": 6800 + }, + { + "epoch": 0.31, + "learning_rate": 1.3817240909709928e-05, + "loss": 0.7102, + "step": 6810 + }, + { + "epoch": 0.31, + "learning_rate": 1.3808161968314495e-05, + "loss": 0.6322, + "step": 6820 + }, + { + "epoch": 0.31, + "learning_rate": 1.3799083026919063e-05, + "loss": 0.7041, + "step": 6830 + }, + { + "epoch": 0.31, + "learning_rate": 1.379000408552363e-05, + "loss": 0.6141, + "step": 6840 + }, + { + "epoch": 0.31, + "learning_rate": 1.3780925144128196e-05, + "loss": 0.6508, + "step": 6850 + }, + { + "epoch": 0.31, + "learning_rate": 1.3771846202732762e-05, + "loss": 0.7113, + "step": 6860 + }, + { + "epoch": 0.31, + "learning_rate": 1.376276726133733e-05, + "loss": 0.7309, + "step": 6870 + }, + { + "epoch": 0.31, + "learning_rate": 1.3753688319941895e-05, + "loss": 0.6883, + "step": 6880 + }, + { + "epoch": 0.31, + "learning_rate": 1.3744609378546463e-05, + "loss": 0.7211, + "step": 6890 + }, + { + "epoch": 0.31, + "learning_rate": 1.373553043715103e-05, + "loss": 0.7285, + "step": 6900 + }, + { + "epoch": 0.31, + "learning_rate": 1.3726451495755595e-05, + "loss": 0.7053, + "step": 6910 + }, + { + "epoch": 0.31, + "learning_rate": 1.3717372554360162e-05, + "loss": 0.652, + "step": 6920 + }, + { + "epoch": 0.31, + "learning_rate": 1.370829361296473e-05, + "loss": 0.6754, + "step": 6930 + }, + { + "epoch": 0.32, + "learning_rate": 1.3699214671569297e-05, + "loss": 0.7719, + "step": 6940 + }, + { + "epoch": 0.32, + "learning_rate": 1.3690135730173863e-05, + "loss": 0.6084, + "step": 6950 + }, + { + "epoch": 0.32, + "learning_rate": 1.3681056788778429e-05, + "loss": 0.6129, + "step": 6960 + }, + { + "epoch": 0.32, + "learning_rate": 1.3671977847382995e-05, + "loss": 0.6258, + "step": 6970 + }, + { + "epoch": 0.32, + "learning_rate": 1.3662898905987562e-05, + "loss": 0.6885, + "step": 6980 + }, + { + "epoch": 0.32, + "learning_rate": 1.365381996459213e-05, + "loss": 0.6445, + "step": 6990 + }, + { + "epoch": 0.32, + "learning_rate": 1.3644741023196697e-05, + "loss": 0.7045, + "step": 7000 + }, + { + "epoch": 0.32, + "eval_accuracy": 0.6038145823273781, + "eval_loss": 0.6830967664718628, + "eval_runtime": 72.7514, + "eval_samples_per_second": 56.934, + "eval_steps_per_second": 14.24, + "step": 7000 + }, + { + "epoch": 0.32, + "learning_rate": 1.3635662081801265e-05, + "loss": 0.6984, + "step": 7010 + }, + { + "epoch": 0.32, + "learning_rate": 1.3626583140405829e-05, + "loss": 0.6678, + "step": 7020 + }, + { + "epoch": 0.32, + "learning_rate": 1.3617504199010397e-05, + "loss": 0.7355, + "step": 7030 + }, + { + "epoch": 0.32, + "learning_rate": 1.3608425257614964e-05, + "loss": 0.6607, + "step": 7040 + }, + { + "epoch": 0.32, + "learning_rate": 1.359934631621953e-05, + "loss": 0.6746, + "step": 7050 + }, + { + "epoch": 0.32, + "learning_rate": 1.3590267374824098e-05, + "loss": 0.5451, + "step": 7060 + }, + { + "epoch": 0.32, + "learning_rate": 1.3581188433428662e-05, + "loss": 0.6838, + "step": 7070 + }, + { + "epoch": 0.32, + "learning_rate": 1.357210949203323e-05, + "loss": 0.6322, + "step": 7080 + }, + { + "epoch": 0.32, + "learning_rate": 1.3563030550637797e-05, + "loss": 0.5895, + "step": 7090 + }, + { + "epoch": 0.32, + "learning_rate": 1.3553951609242364e-05, + "loss": 0.6818, + "step": 7100 + }, + { + "epoch": 0.32, + "learning_rate": 1.3544872667846932e-05, + "loss": 0.777, + "step": 7110 + }, + { + "epoch": 0.32, + "learning_rate": 1.3535793726451496e-05, + "loss": 0.7098, + "step": 7120 + }, + { + "epoch": 0.32, + "learning_rate": 1.3526714785056064e-05, + "loss": 0.7652, + "step": 7130 + }, + { + "epoch": 0.32, + "learning_rate": 1.3517635843660631e-05, + "loss": 0.6576, + "step": 7140 + }, + { + "epoch": 0.32, + "learning_rate": 1.3508556902265197e-05, + "loss": 0.6236, + "step": 7150 + }, + { + "epoch": 0.33, + "learning_rate": 1.3499477960869765e-05, + "loss": 0.6613, + "step": 7160 + }, + { + "epoch": 0.33, + "learning_rate": 1.3490399019474329e-05, + "loss": 0.6992, + "step": 7170 + }, + { + "epoch": 0.33, + "learning_rate": 1.3481320078078896e-05, + "loss": 0.5818, + "step": 7180 + }, + { + "epoch": 0.33, + "learning_rate": 1.3472241136683464e-05, + "loss": 0.6623, + "step": 7190 + }, + { + "epoch": 0.33, + "learning_rate": 1.3463162195288031e-05, + "loss": 0.6654, + "step": 7200 + }, + { + "epoch": 0.33, + "learning_rate": 1.3454083253892599e-05, + "loss": 0.7695, + "step": 7210 + }, + { + "epoch": 0.33, + "learning_rate": 1.3445004312497163e-05, + "loss": 0.6055, + "step": 7220 + }, + { + "epoch": 0.33, + "learning_rate": 1.343592537110173e-05, + "loss": 0.7094, + "step": 7230 + }, + { + "epoch": 0.33, + "learning_rate": 1.3426846429706298e-05, + "loss": 0.684, + "step": 7240 + }, + { + "epoch": 0.33, + "learning_rate": 1.3417767488310864e-05, + "loss": 0.7111, + "step": 7250 + }, + { + "epoch": 0.33, + "learning_rate": 1.3408688546915431e-05, + "loss": 0.7848, + "step": 7260 + }, + { + "epoch": 0.33, + "learning_rate": 1.3399609605519996e-05, + "loss": 0.7582, + "step": 7270 + }, + { + "epoch": 0.33, + "learning_rate": 1.3390530664124563e-05, + "loss": 0.7223, + "step": 7280 + }, + { + "epoch": 0.33, + "learning_rate": 1.338145172272913e-05, + "loss": 0.6324, + "step": 7290 + }, + { + "epoch": 0.33, + "learning_rate": 1.3372372781333698e-05, + "loss": 0.673, + "step": 7300 + }, + { + "epoch": 0.33, + "learning_rate": 1.3363293839938266e-05, + "loss": 0.6451, + "step": 7310 + }, + { + "epoch": 0.33, + "learning_rate": 1.335421489854283e-05, + "loss": 0.6449, + "step": 7320 + }, + { + "epoch": 0.33, + "learning_rate": 1.3345135957147398e-05, + "loss": 0.7188, + "step": 7330 + }, + { + "epoch": 0.33, + "learning_rate": 1.3336057015751963e-05, + "loss": 0.7484, + "step": 7340 + }, + { + "epoch": 0.33, + "learning_rate": 1.3326978074356531e-05, + "loss": 0.6664, + "step": 7350 + }, + { + "epoch": 0.33, + "learning_rate": 1.3317899132961098e-05, + "loss": 0.7406, + "step": 7360 + }, + { + "epoch": 0.33, + "learning_rate": 1.3308820191565666e-05, + "loss": 0.7078, + "step": 7370 + }, + { + "epoch": 0.34, + "learning_rate": 1.329974125017023e-05, + "loss": 0.5885, + "step": 7380 + }, + { + "epoch": 0.34, + "learning_rate": 1.3290662308774798e-05, + "loss": 0.7395, + "step": 7390 + }, + { + "epoch": 0.34, + "learning_rate": 1.3281583367379365e-05, + "loss": 0.6184, + "step": 7400 + }, + { + "epoch": 0.34, + "learning_rate": 1.3272504425983933e-05, + "loss": 0.7012, + "step": 7410 + }, + { + "epoch": 0.34, + "learning_rate": 1.3263425484588499e-05, + "loss": 0.7332, + "step": 7420 + }, + { + "epoch": 0.34, + "learning_rate": 1.3254346543193064e-05, + "loss": 0.5527, + "step": 7430 + }, + { + "epoch": 0.34, + "learning_rate": 1.324526760179763e-05, + "loss": 0.6139, + "step": 7440 + }, + { + "epoch": 0.34, + "learning_rate": 1.3236188660402198e-05, + "loss": 0.6439, + "step": 7450 + }, + { + "epoch": 0.34, + "learning_rate": 1.3227109719006765e-05, + "loss": 0.6129, + "step": 7460 + }, + { + "epoch": 0.34, + "learning_rate": 1.3218030777611333e-05, + "loss": 0.6891, + "step": 7470 + }, + { + "epoch": 0.34, + "learning_rate": 1.3208951836215897e-05, + "loss": 0.7117, + "step": 7480 + }, + { + "epoch": 0.34, + "learning_rate": 1.3199872894820465e-05, + "loss": 0.7484, + "step": 7490 + }, + { + "epoch": 0.34, + "learning_rate": 1.3190793953425032e-05, + "loss": 0.6268, + "step": 7500 + }, + { + "epoch": 0.34, + "eval_accuracy": 0.6071945919845485, + "eval_loss": 0.6775137186050415, + "eval_runtime": 73.7281, + "eval_samples_per_second": 56.179, + "eval_steps_per_second": 14.052, + "step": 7500 + }, + { + "epoch": 0.34, + "learning_rate": 1.31817150120296e-05, + "loss": 0.7041, + "step": 7510 + }, + { + "epoch": 0.34, + "learning_rate": 1.3172636070634166e-05, + "loss": 0.7602, + "step": 7520 + }, + { + "epoch": 0.34, + "learning_rate": 1.3163557129238731e-05, + "loss": 0.6477, + "step": 7530 + }, + { + "epoch": 0.34, + "learning_rate": 1.3154478187843297e-05, + "loss": 0.6014, + "step": 7540 + }, + { + "epoch": 0.34, + "learning_rate": 1.3145399246447865e-05, + "loss": 0.6334, + "step": 7550 + }, + { + "epoch": 0.34, + "learning_rate": 1.3136320305052432e-05, + "loss": 0.7918, + "step": 7560 + }, + { + "epoch": 0.34, + "learning_rate": 1.3127241363657e-05, + "loss": 0.6686, + "step": 7570 + }, + { + "epoch": 0.34, + "learning_rate": 1.3118162422261564e-05, + "loss": 0.6744, + "step": 7580 + }, + { + "epoch": 0.34, + "learning_rate": 1.3109083480866132e-05, + "loss": 0.6256, + "step": 7590 + }, + { + "epoch": 0.34, + "learning_rate": 1.31000045394707e-05, + "loss": 0.675, + "step": 7600 + }, + { + "epoch": 0.35, + "learning_rate": 1.3090925598075267e-05, + "loss": 0.7121, + "step": 7610 + }, + { + "epoch": 0.35, + "learning_rate": 1.3081846656679833e-05, + "loss": 0.6578, + "step": 7620 + }, + { + "epoch": 0.35, + "learning_rate": 1.3072767715284398e-05, + "loss": 0.6576, + "step": 7630 + }, + { + "epoch": 0.35, + "learning_rate": 1.3063688773888964e-05, + "loss": 0.6482, + "step": 7640 + }, + { + "epoch": 0.35, + "learning_rate": 1.3054609832493532e-05, + "loss": 0.5342, + "step": 7650 + }, + { + "epoch": 0.35, + "learning_rate": 1.30455308910981e-05, + "loss": 0.6906, + "step": 7660 + }, + { + "epoch": 0.35, + "learning_rate": 1.3036451949702667e-05, + "loss": 0.6277, + "step": 7670 + }, + { + "epoch": 0.35, + "learning_rate": 1.3027373008307231e-05, + "loss": 0.5578, + "step": 7680 + }, + { + "epoch": 0.35, + "learning_rate": 1.3018294066911799e-05, + "loss": 0.5428, + "step": 7690 + }, + { + "epoch": 0.35, + "learning_rate": 1.3009215125516366e-05, + "loss": 0.6779, + "step": 7700 + }, + { + "epoch": 0.35, + "learning_rate": 1.3000136184120932e-05, + "loss": 0.7418, + "step": 7710 + }, + { + "epoch": 0.35, + "learning_rate": 1.29910572427255e-05, + "loss": 0.6016, + "step": 7720 + }, + { + "epoch": 0.35, + "learning_rate": 1.2981978301330065e-05, + "loss": 0.7863, + "step": 7730 + }, + { + "epoch": 0.35, + "learning_rate": 1.2972899359934631e-05, + "loss": 0.6191, + "step": 7740 + }, + { + "epoch": 0.35, + "learning_rate": 1.2963820418539199e-05, + "loss": 0.6687, + "step": 7750 + }, + { + "epoch": 0.35, + "learning_rate": 1.2954741477143766e-05, + "loss": 0.6523, + "step": 7760 + }, + { + "epoch": 0.35, + "learning_rate": 1.2945662535748334e-05, + "loss": 0.7125, + "step": 7770 + }, + { + "epoch": 0.35, + "learning_rate": 1.2936583594352901e-05, + "loss": 0.5959, + "step": 7780 + }, + { + "epoch": 0.35, + "learning_rate": 1.2927504652957466e-05, + "loss": 0.6693, + "step": 7790 + }, + { + "epoch": 0.35, + "learning_rate": 1.2918425711562033e-05, + "loss": 0.6715, + "step": 7800 + }, + { + "epoch": 0.35, + "learning_rate": 1.2909346770166599e-05, + "loss": 0.7223, + "step": 7810 + }, + { + "epoch": 0.35, + "learning_rate": 1.2900267828771166e-05, + "loss": 0.6242, + "step": 7820 + }, + { + "epoch": 0.36, + "learning_rate": 1.2891188887375734e-05, + "loss": 0.7363, + "step": 7830 + }, + { + "epoch": 0.36, + "learning_rate": 1.2882109945980298e-05, + "loss": 0.6469, + "step": 7840 + }, + { + "epoch": 0.36, + "learning_rate": 1.2873031004584866e-05, + "loss": 0.632, + "step": 7850 + }, + { + "epoch": 0.36, + "learning_rate": 1.2863952063189433e-05, + "loss": 0.69, + "step": 7860 + }, + { + "epoch": 0.36, + "learning_rate": 1.2854873121794e-05, + "loss": 0.7389, + "step": 7870 + }, + { + "epoch": 0.36, + "learning_rate": 1.2845794180398568e-05, + "loss": 0.693, + "step": 7880 + }, + { + "epoch": 0.36, + "learning_rate": 1.2836715239003133e-05, + "loss": 0.6895, + "step": 7890 + }, + { + "epoch": 0.36, + "learning_rate": 1.28276362976077e-05, + "loss": 0.6652, + "step": 7900 + }, + { + "epoch": 0.36, + "learning_rate": 1.2818557356212266e-05, + "loss": 0.6574, + "step": 7910 + }, + { + "epoch": 0.36, + "learning_rate": 1.2809478414816833e-05, + "loss": 0.7434, + "step": 7920 + }, + { + "epoch": 0.36, + "learning_rate": 1.2800399473421401e-05, + "loss": 0.6785, + "step": 7930 + }, + { + "epoch": 0.36, + "learning_rate": 1.2791320532025965e-05, + "loss": 0.6727, + "step": 7940 + }, + { + "epoch": 0.36, + "learning_rate": 1.2782241590630533e-05, + "loss": 0.6723, + "step": 7950 + }, + { + "epoch": 0.36, + "learning_rate": 1.27731626492351e-05, + "loss": 0.7227, + "step": 7960 + }, + { + "epoch": 0.36, + "learning_rate": 1.2764083707839668e-05, + "loss": 0.6693, + "step": 7970 + }, + { + "epoch": 0.36, + "learning_rate": 1.2755004766444235e-05, + "loss": 0.5281, + "step": 7980 + }, + { + "epoch": 0.36, + "learning_rate": 1.27459258250488e-05, + "loss": 0.7488, + "step": 7990 + }, + { + "epoch": 0.36, + "learning_rate": 1.2736846883653367e-05, + "loss": 0.5973, + "step": 8000 + }, + { + "epoch": 0.36, + "eval_accuracy": 0.6120231772090777, + "eval_loss": 0.6748174428939819, + "eval_runtime": 73.8533, + "eval_samples_per_second": 56.084, + "eval_steps_per_second": 14.028, + "step": 8000 + }, + { + "epoch": 0.36, + "learning_rate": 1.2727767942257933e-05, + "loss": 0.6354, + "step": 8010 + }, + { + "epoch": 0.36, + "learning_rate": 1.27186890008625e-05, + "loss": 0.6363, + "step": 8020 + }, + { + "epoch": 0.36, + "learning_rate": 1.2709610059467068e-05, + "loss": 0.7402, + "step": 8030 + }, + { + "epoch": 0.36, + "learning_rate": 1.2700531118071632e-05, + "loss": 0.675, + "step": 8040 + }, + { + "epoch": 0.37, + "learning_rate": 1.26914521766762e-05, + "loss": 0.7207, + "step": 8050 + }, + { + "epoch": 0.37, + "learning_rate": 1.2682373235280767e-05, + "loss": 0.6342, + "step": 8060 + }, + { + "epoch": 0.37, + "learning_rate": 1.2673294293885335e-05, + "loss": 0.6756, + "step": 8070 + }, + { + "epoch": 0.37, + "learning_rate": 1.26642153524899e-05, + "loss": 0.7234, + "step": 8080 + }, + { + "epoch": 0.37, + "learning_rate": 1.2655136411094466e-05, + "loss": 0.6652, + "step": 8090 + }, + { + "epoch": 0.37, + "learning_rate": 1.2646057469699034e-05, + "loss": 0.6008, + "step": 8100 + }, + { + "epoch": 0.37, + "learning_rate": 1.26369785283036e-05, + "loss": 0.7371, + "step": 8110 + }, + { + "epoch": 0.37, + "learning_rate": 1.2627899586908167e-05, + "loss": 0.6217, + "step": 8120 + }, + { + "epoch": 0.37, + "learning_rate": 1.2618820645512735e-05, + "loss": 0.6926, + "step": 8130 + }, + { + "epoch": 0.37, + "learning_rate": 1.2609741704117302e-05, + "loss": 0.6658, + "step": 8140 + }, + { + "epoch": 0.37, + "learning_rate": 1.2600662762721867e-05, + "loss": 0.6504, + "step": 8150 + }, + { + "epoch": 0.37, + "learning_rate": 1.2591583821326434e-05, + "loss": 0.8324, + "step": 8160 + }, + { + "epoch": 0.37, + "learning_rate": 1.2582504879931002e-05, + "loss": 0.6648, + "step": 8170 + }, + { + "epoch": 0.37, + "learning_rate": 1.2573425938535568e-05, + "loss": 0.6914, + "step": 8180 + }, + { + "epoch": 0.37, + "learning_rate": 1.2564346997140135e-05, + "loss": 0.6416, + "step": 8190 + }, + { + "epoch": 0.37, + "learning_rate": 1.2555268055744701e-05, + "loss": 0.5684, + "step": 8200 + }, + { + "epoch": 0.37, + "learning_rate": 1.2546189114349267e-05, + "loss": 0.6676, + "step": 8210 + }, + { + "epoch": 0.37, + "learning_rate": 1.2537110172953834e-05, + "loss": 0.6582, + "step": 8220 + }, + { + "epoch": 0.37, + "learning_rate": 1.2528031231558402e-05, + "loss": 0.7184, + "step": 8230 + }, + { + "epoch": 0.37, + "learning_rate": 1.251895229016297e-05, + "loss": 0.6281, + "step": 8240 + }, + { + "epoch": 0.37, + "learning_rate": 1.2509873348767534e-05, + "loss": 0.7473, + "step": 8250 + }, + { + "epoch": 0.37, + "learning_rate": 1.2500794407372101e-05, + "loss": 0.6387, + "step": 8260 + }, + { + "epoch": 0.38, + "learning_rate": 1.2491715465976669e-05, + "loss": 0.7762, + "step": 8270 + }, + { + "epoch": 0.38, + "learning_rate": 1.2482636524581235e-05, + "loss": 0.6533, + "step": 8280 + }, + { + "epoch": 0.38, + "learning_rate": 1.2473557583185802e-05, + "loss": 0.7139, + "step": 8290 + }, + { + "epoch": 0.38, + "learning_rate": 1.2464478641790368e-05, + "loss": 0.6283, + "step": 8300 + }, + { + "epoch": 0.38, + "learning_rate": 1.2455399700394934e-05, + "loss": 0.5828, + "step": 8310 + }, + { + "epoch": 0.38, + "learning_rate": 1.2446320758999501e-05, + "loss": 0.6191, + "step": 8320 + }, + { + "epoch": 0.38, + "learning_rate": 1.2437241817604069e-05, + "loss": 0.6521, + "step": 8330 + }, + { + "epoch": 0.38, + "learning_rate": 1.2428162876208636e-05, + "loss": 0.7365, + "step": 8340 + }, + { + "epoch": 0.38, + "learning_rate": 1.24190839348132e-05, + "loss": 0.6521, + "step": 8350 + }, + { + "epoch": 0.38, + "learning_rate": 1.2410004993417768e-05, + "loss": 0.7143, + "step": 8360 + }, + { + "epoch": 0.38, + "learning_rate": 1.2400926052022336e-05, + "loss": 0.5857, + "step": 8370 + }, + { + "epoch": 0.38, + "learning_rate": 1.2391847110626901e-05, + "loss": 0.6746, + "step": 8380 + }, + { + "epoch": 0.38, + "learning_rate": 1.2382768169231469e-05, + "loss": 0.7582, + "step": 8390 + }, + { + "epoch": 0.38, + "learning_rate": 1.2373689227836035e-05, + "loss": 0.6582, + "step": 8400 + }, + { + "epoch": 0.38, + "learning_rate": 1.23646102864406e-05, + "loss": 0.6844, + "step": 8410 + }, + { + "epoch": 0.38, + "learning_rate": 1.2355531345045168e-05, + "loss": 0.6281, + "step": 8420 + }, + { + "epoch": 0.38, + "learning_rate": 1.2346452403649736e-05, + "loss": 0.6521, + "step": 8430 + }, + { + "epoch": 0.38, + "learning_rate": 1.2337373462254303e-05, + "loss": 0.6479, + "step": 8440 + }, + { + "epoch": 0.38, + "learning_rate": 1.2328294520858867e-05, + "loss": 0.7461, + "step": 8450 + }, + { + "epoch": 0.38, + "learning_rate": 1.2319215579463435e-05, + "loss": 0.6861, + "step": 8460 + }, + { + "epoch": 0.38, + "learning_rate": 1.2310136638068003e-05, + "loss": 0.6818, + "step": 8470 + }, + { + "epoch": 0.38, + "learning_rate": 1.2301057696672568e-05, + "loss": 0.6973, + "step": 8480 + }, + { + "epoch": 0.39, + "learning_rate": 1.2291978755277136e-05, + "loss": 0.6914, + "step": 8490 + }, + { + "epoch": 0.39, + "learning_rate": 1.2282899813881704e-05, + "loss": 0.6592, + "step": 8500 + }, + { + "epoch": 0.39, + "eval_accuracy": 0.613713182037663, + "eval_loss": 0.6704452633857727, + "eval_runtime": 73.8233, + "eval_samples_per_second": 56.107, + "eval_steps_per_second": 14.034, + "step": 8500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2273820872486268e-05, + "loss": 0.6002, + "step": 8510 + }, + { + "epoch": 0.39, + "learning_rate": 1.2264741931090835e-05, + "loss": 0.5867, + "step": 8520 + }, + { + "epoch": 0.39, + "learning_rate": 1.2255662989695403e-05, + "loss": 0.6746, + "step": 8530 + }, + { + "epoch": 0.39, + "learning_rate": 1.224658404829997e-05, + "loss": 0.7316, + "step": 8540 + }, + { + "epoch": 0.39, + "learning_rate": 1.2237505106904536e-05, + "loss": 0.7844, + "step": 8550 + }, + { + "epoch": 0.39, + "learning_rate": 1.2228426165509102e-05, + "loss": 0.7021, + "step": 8560 + }, + { + "epoch": 0.39, + "learning_rate": 1.221934722411367e-05, + "loss": 0.6369, + "step": 8570 + }, + { + "epoch": 0.39, + "learning_rate": 1.2210268282718235e-05, + "loss": 0.6469, + "step": 8580 + }, + { + "epoch": 0.39, + "learning_rate": 1.2201189341322803e-05, + "loss": 0.7189, + "step": 8590 + }, + { + "epoch": 0.39, + "learning_rate": 1.219211039992737e-05, + "loss": 0.668, + "step": 8600 + }, + { + "epoch": 0.39, + "learning_rate": 1.2183031458531935e-05, + "loss": 0.6777, + "step": 8610 + }, + { + "epoch": 0.39, + "learning_rate": 1.2173952517136502e-05, + "loss": 0.5765, + "step": 8620 + }, + { + "epoch": 0.39, + "learning_rate": 1.216487357574107e-05, + "loss": 0.6963, + "step": 8630 + }, + { + "epoch": 0.39, + "learning_rate": 1.2155794634345637e-05, + "loss": 0.6502, + "step": 8640 + }, + { + "epoch": 0.39, + "learning_rate": 1.2146715692950203e-05, + "loss": 0.7574, + "step": 8650 + }, + { + "epoch": 0.39, + "learning_rate": 1.2137636751554769e-05, + "loss": 0.7723, + "step": 8660 + }, + { + "epoch": 0.39, + "learning_rate": 1.2128557810159336e-05, + "loss": 0.6846, + "step": 8670 + }, + { + "epoch": 0.39, + "learning_rate": 1.2119478868763902e-05, + "loss": 0.6258, + "step": 8680 + }, + { + "epoch": 0.39, + "learning_rate": 1.211039992736847e-05, + "loss": 0.6141, + "step": 8690 + }, + { + "epoch": 0.39, + "learning_rate": 1.2101320985973037e-05, + "loss": 0.576, + "step": 8700 + }, + { + "epoch": 0.4, + "learning_rate": 1.2092242044577602e-05, + "loss": 0.7633, + "step": 8710 + }, + { + "epoch": 0.4, + "learning_rate": 1.2083163103182169e-05, + "loss": 0.6477, + "step": 8720 + }, + { + "epoch": 0.4, + "learning_rate": 1.2074084161786737e-05, + "loss": 0.7104, + "step": 8730 + }, + { + "epoch": 0.4, + "learning_rate": 1.2065005220391304e-05, + "loss": 0.673, + "step": 8740 + }, + { + "epoch": 0.4, + "learning_rate": 1.205592627899587e-05, + "loss": 0.6516, + "step": 8750 + }, + { + "epoch": 0.4, + "learning_rate": 1.2046847337600436e-05, + "loss": 0.7199, + "step": 8760 + }, + { + "epoch": 0.4, + "learning_rate": 1.2037768396205003e-05, + "loss": 0.7305, + "step": 8770 + }, + { + "epoch": 0.4, + "learning_rate": 1.202868945480957e-05, + "loss": 0.6078, + "step": 8780 + }, + { + "epoch": 0.4, + "learning_rate": 1.2019610513414137e-05, + "loss": 0.6326, + "step": 8790 + }, + { + "epoch": 0.4, + "learning_rate": 1.2010531572018704e-05, + "loss": 0.6406, + "step": 8800 + }, + { + "epoch": 0.4, + "learning_rate": 1.2001452630623269e-05, + "loss": 0.7246, + "step": 8810 + }, + { + "epoch": 0.4, + "learning_rate": 1.1992373689227836e-05, + "loss": 0.7004, + "step": 8820 + }, + { + "epoch": 0.4, + "learning_rate": 1.1983294747832404e-05, + "loss": 0.6844, + "step": 8830 + }, + { + "epoch": 0.4, + "learning_rate": 1.1974215806436971e-05, + "loss": 0.7195, + "step": 8840 + }, + { + "epoch": 0.4, + "learning_rate": 1.1965136865041537e-05, + "loss": 0.6477, + "step": 8850 + }, + { + "epoch": 0.4, + "learning_rate": 1.1956057923646103e-05, + "loss": 0.7027, + "step": 8860 + }, + { + "epoch": 0.4, + "learning_rate": 1.194697898225067e-05, + "loss": 0.6791, + "step": 8870 + }, + { + "epoch": 0.4, + "learning_rate": 1.1937900040855236e-05, + "loss": 0.5969, + "step": 8880 + }, + { + "epoch": 0.4, + "learning_rate": 1.1928821099459804e-05, + "loss": 0.674, + "step": 8890 + }, + { + "epoch": 0.4, + "learning_rate": 1.1919742158064371e-05, + "loss": 0.6832, + "step": 8900 + }, + { + "epoch": 0.4, + "learning_rate": 1.1910663216668939e-05, + "loss": 0.6639, + "step": 8910 + }, + { + "epoch": 0.4, + "learning_rate": 1.1901584275273503e-05, + "loss": 0.6672, + "step": 8920 + }, + { + "epoch": 0.41, + "learning_rate": 1.189250533387807e-05, + "loss": 0.6502, + "step": 8930 + }, + { + "epoch": 0.41, + "learning_rate": 1.1883426392482638e-05, + "loss": 0.6389, + "step": 8940 + }, + { + "epoch": 0.41, + "learning_rate": 1.1874347451087204e-05, + "loss": 0.6297, + "step": 8950 + }, + { + "epoch": 0.41, + "learning_rate": 1.1865268509691772e-05, + "loss": 0.6564, + "step": 8960 + }, + { + "epoch": 0.41, + "learning_rate": 1.1856189568296337e-05, + "loss": 0.66, + "step": 8970 + }, + { + "epoch": 0.41, + "learning_rate": 1.1847110626900903e-05, + "loss": 0.8164, + "step": 8980 + }, + { + "epoch": 0.41, + "learning_rate": 1.183803168550547e-05, + "loss": 0.6418, + "step": 8990 + }, + { + "epoch": 0.41, + "learning_rate": 1.1828952744110038e-05, + "loss": 0.6805, + "step": 9000 + }, + { + "epoch": 0.41, + "eval_accuracy": 0.6139546112988894, + "eval_loss": 0.6687760353088379, + "eval_runtime": 74.2738, + "eval_samples_per_second": 55.767, + "eval_steps_per_second": 13.948, + "step": 9000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1819873802714606e-05, + "loss": 0.6879, + "step": 9010 + }, + { + "epoch": 0.41, + "learning_rate": 1.181079486131917e-05, + "loss": 0.7094, + "step": 9020 + }, + { + "epoch": 0.41, + "learning_rate": 1.1801715919923738e-05, + "loss": 0.6699, + "step": 9030 + }, + { + "epoch": 0.41, + "learning_rate": 1.1792636978528305e-05, + "loss": 0.7035, + "step": 9040 + }, + { + "epoch": 0.41, + "learning_rate": 1.1783558037132871e-05, + "loss": 0.7768, + "step": 9050 + }, + { + "epoch": 0.41, + "learning_rate": 1.1774479095737438e-05, + "loss": 0.6312, + "step": 9060 + }, + { + "epoch": 0.41, + "learning_rate": 1.1765400154342004e-05, + "loss": 0.658, + "step": 9070 + }, + { + "epoch": 0.41, + "learning_rate": 1.175632121294657e-05, + "loss": 0.6699, + "step": 9080 + }, + { + "epoch": 0.41, + "learning_rate": 1.1747242271551138e-05, + "loss": 0.7354, + "step": 9090 + }, + { + "epoch": 0.41, + "learning_rate": 1.1738163330155705e-05, + "loss": 0.6662, + "step": 9100 + }, + { + "epoch": 0.41, + "learning_rate": 1.1729084388760273e-05, + "loss": 0.6908, + "step": 9110 + }, + { + "epoch": 0.41, + "learning_rate": 1.1720005447364837e-05, + "loss": 0.7621, + "step": 9120 + }, + { + "epoch": 0.41, + "learning_rate": 1.1710926505969405e-05, + "loss": 0.7016, + "step": 9130 + }, + { + "epoch": 0.41, + "learning_rate": 1.1701847564573972e-05, + "loss": 0.6984, + "step": 9140 + }, + { + "epoch": 0.42, + "learning_rate": 1.1692768623178538e-05, + "loss": 0.6508, + "step": 9150 + }, + { + "epoch": 0.42, + "learning_rate": 1.1683689681783105e-05, + "loss": 0.6754, + "step": 9160 + }, + { + "epoch": 0.42, + "learning_rate": 1.1674610740387671e-05, + "loss": 0.6082, + "step": 9170 + }, + { + "epoch": 0.42, + "learning_rate": 1.1665531798992237e-05, + "loss": 0.651, + "step": 9180 + }, + { + "epoch": 0.42, + "learning_rate": 1.1656452857596805e-05, + "loss": 0.7121, + "step": 9190 + }, + { + "epoch": 0.42, + "learning_rate": 1.1647373916201372e-05, + "loss": 0.793, + "step": 9200 + }, + { + "epoch": 0.42, + "learning_rate": 1.163829497480594e-05, + "loss": 0.5602, + "step": 9210 + }, + { + "epoch": 0.42, + "learning_rate": 1.1629216033410504e-05, + "loss": 0.6713, + "step": 9220 + }, + { + "epoch": 0.42, + "learning_rate": 1.1620137092015071e-05, + "loss": 0.7727, + "step": 9230 + }, + { + "epoch": 0.42, + "learning_rate": 1.1611058150619639e-05, + "loss": 0.6102, + "step": 9240 + }, + { + "epoch": 0.42, + "learning_rate": 1.1601979209224205e-05, + "loss": 0.6553, + "step": 9250 + }, + { + "epoch": 0.42, + "learning_rate": 1.1592900267828772e-05, + "loss": 0.6861, + "step": 9260 + }, + { + "epoch": 0.42, + "learning_rate": 1.158382132643334e-05, + "loss": 0.6576, + "step": 9270 + }, + { + "epoch": 0.42, + "learning_rate": 1.1574742385037904e-05, + "loss": 0.6504, + "step": 9280 + }, + { + "epoch": 0.42, + "learning_rate": 1.1565663443642472e-05, + "loss": 0.6322, + "step": 9290 + }, + { + "epoch": 0.42, + "learning_rate": 1.155658450224704e-05, + "loss": 0.6512, + "step": 9300 + }, + { + "epoch": 0.42, + "learning_rate": 1.1547505560851607e-05, + "loss": 0.6479, + "step": 9310 + }, + { + "epoch": 0.42, + "learning_rate": 1.1538426619456173e-05, + "loss": 0.6904, + "step": 9320 + }, + { + "epoch": 0.42, + "learning_rate": 1.1529347678060738e-05, + "loss": 0.7137, + "step": 9330 + }, + { + "epoch": 0.42, + "learning_rate": 1.1520268736665306e-05, + "loss": 0.6318, + "step": 9340 + }, + { + "epoch": 0.42, + "learning_rate": 1.1511189795269872e-05, + "loss": 0.7457, + "step": 9350 + }, + { + "epoch": 0.42, + "learning_rate": 1.150211085387444e-05, + "loss": 0.6027, + "step": 9360 + }, + { + "epoch": 0.43, + "learning_rate": 1.1493031912479007e-05, + "loss": 0.6516, + "step": 9370 + }, + { + "epoch": 0.43, + "learning_rate": 1.1483952971083571e-05, + "loss": 0.6967, + "step": 9380 + }, + { + "epoch": 0.43, + "learning_rate": 1.1474874029688139e-05, + "loss": 0.7232, + "step": 9390 + }, + { + "epoch": 0.43, + "learning_rate": 1.1465795088292706e-05, + "loss": 0.7355, + "step": 9400 + }, + { + "epoch": 0.43, + "learning_rate": 1.1456716146897274e-05, + "loss": 0.8191, + "step": 9410 + }, + { + "epoch": 0.43, + "learning_rate": 1.144763720550184e-05, + "loss": 0.6443, + "step": 9420 + }, + { + "epoch": 0.43, + "learning_rate": 1.1438558264106405e-05, + "loss": 0.6482, + "step": 9430 + }, + { + "epoch": 0.43, + "learning_rate": 1.1429479322710973e-05, + "loss": 0.7633, + "step": 9440 + }, + { + "epoch": 0.43, + "learning_rate": 1.1420400381315539e-05, + "loss": 0.6391, + "step": 9450 + }, + { + "epoch": 0.43, + "learning_rate": 1.1411321439920106e-05, + "loss": 0.7008, + "step": 9460 + }, + { + "epoch": 0.43, + "learning_rate": 1.1402242498524674e-05, + "loss": 0.6922, + "step": 9470 + }, + { + "epoch": 0.43, + "learning_rate": 1.1393163557129238e-05, + "loss": 0.5777, + "step": 9480 + }, + { + "epoch": 0.43, + "learning_rate": 1.1384084615733806e-05, + "loss": 0.6977, + "step": 9490 + }, + { + "epoch": 0.43, + "learning_rate": 1.1375005674338373e-05, + "loss": 0.5785, + "step": 9500 + }, + { + "epoch": 0.43, + "eval_accuracy": 0.6129888942539836, + "eval_loss": 0.6736442446708679, + "eval_runtime": 72.7311, + "eval_samples_per_second": 56.949, + "eval_steps_per_second": 14.244, + "step": 9500 + }, + { + "epoch": 0.43, + "learning_rate": 1.136592673294294e-05, + "loss": 0.5643, + "step": 9510 + }, + { + "epoch": 0.43, + "learning_rate": 1.1356847791547507e-05, + "loss": 0.6428, + "step": 9520 + }, + { + "epoch": 0.43, + "learning_rate": 1.1347768850152072e-05, + "loss": 0.6371, + "step": 9530 + }, + { + "epoch": 0.43, + "learning_rate": 1.133868990875664e-05, + "loss": 0.633, + "step": 9540 + }, + { + "epoch": 0.43, + "learning_rate": 1.1329610967361206e-05, + "loss": 0.7713, + "step": 9550 + }, + { + "epoch": 0.43, + "learning_rate": 1.1320532025965773e-05, + "loss": 0.7596, + "step": 9560 + }, + { + "epoch": 0.43, + "learning_rate": 1.131145308457034e-05, + "loss": 0.635, + "step": 9570 + }, + { + "epoch": 0.43, + "learning_rate": 1.1302374143174905e-05, + "loss": 0.6986, + "step": 9580 + }, + { + "epoch": 0.44, + "learning_rate": 1.1293295201779473e-05, + "loss": 0.6287, + "step": 9590 + }, + { + "epoch": 0.44, + "learning_rate": 1.128421626038404e-05, + "loss": 0.6723, + "step": 9600 + }, + { + "epoch": 0.44, + "learning_rate": 1.1275137318988608e-05, + "loss": 0.5947, + "step": 9610 + }, + { + "epoch": 0.44, + "learning_rate": 1.1266058377593173e-05, + "loss": 0.757, + "step": 9620 + }, + { + "epoch": 0.44, + "learning_rate": 1.1256979436197741e-05, + "loss": 0.5848, + "step": 9630 + }, + { + "epoch": 0.44, + "learning_rate": 1.1247900494802307e-05, + "loss": 0.7131, + "step": 9640 + }, + { + "epoch": 0.44, + "learning_rate": 1.1238821553406873e-05, + "loss": 0.6102, + "step": 9650 + }, + { + "epoch": 0.44, + "learning_rate": 1.122974261201144e-05, + "loss": 0.7449, + "step": 9660 + }, + { + "epoch": 0.44, + "learning_rate": 1.1220663670616008e-05, + "loss": 0.6512, + "step": 9670 + }, + { + "epoch": 0.44, + "learning_rate": 1.1211584729220575e-05, + "loss": 0.6586, + "step": 9680 + }, + { + "epoch": 0.44, + "learning_rate": 1.120250578782514e-05, + "loss": 0.6793, + "step": 9690 + }, + { + "epoch": 0.44, + "learning_rate": 1.1193426846429707e-05, + "loss": 0.6764, + "step": 9700 + }, + { + "epoch": 0.44, + "learning_rate": 1.1184347905034275e-05, + "loss": 0.7092, + "step": 9710 + }, + { + "epoch": 0.44, + "learning_rate": 1.117526896363884e-05, + "loss": 0.6264, + "step": 9720 + }, + { + "epoch": 0.44, + "learning_rate": 1.1166190022243408e-05, + "loss": 0.6221, + "step": 9730 + }, + { + "epoch": 0.44, + "learning_rate": 1.1157111080847974e-05, + "loss": 0.6859, + "step": 9740 + }, + { + "epoch": 0.44, + "learning_rate": 1.114803213945254e-05, + "loss": 0.6084, + "step": 9750 + }, + { + "epoch": 0.44, + "learning_rate": 1.1138953198057107e-05, + "loss": 0.6539, + "step": 9760 + }, + { + "epoch": 0.44, + "learning_rate": 1.1129874256661675e-05, + "loss": 0.6131, + "step": 9770 + }, + { + "epoch": 0.44, + "learning_rate": 1.1120795315266242e-05, + "loss": 0.6937, + "step": 9780 + }, + { + "epoch": 0.44, + "learning_rate": 1.1111716373870806e-05, + "loss": 0.7943, + "step": 9790 + }, + { + "epoch": 0.44, + "learning_rate": 1.1102637432475374e-05, + "loss": 0.6664, + "step": 9800 + }, + { + "epoch": 0.45, + "learning_rate": 1.1093558491079942e-05, + "loss": 0.7092, + "step": 9810 + }, + { + "epoch": 0.45, + "learning_rate": 1.1084479549684507e-05, + "loss": 0.6586, + "step": 9820 + }, + { + "epoch": 0.45, + "learning_rate": 1.1075400608289075e-05, + "loss": 0.6746, + "step": 9830 + }, + { + "epoch": 0.45, + "learning_rate": 1.106632166689364e-05, + "loss": 0.6631, + "step": 9840 + }, + { + "epoch": 0.45, + "learning_rate": 1.1057242725498207e-05, + "loss": 0.6672, + "step": 9850 + }, + { + "epoch": 0.45, + "learning_rate": 1.1048163784102774e-05, + "loss": 0.6475, + "step": 9860 + }, + { + "epoch": 0.45, + "learning_rate": 1.1039084842707342e-05, + "loss": 0.7523, + "step": 9870 + }, + { + "epoch": 0.45, + "learning_rate": 1.103000590131191e-05, + "loss": 0.6203, + "step": 9880 + }, + { + "epoch": 0.45, + "learning_rate": 1.1020926959916473e-05, + "loss": 0.6355, + "step": 9890 + }, + { + "epoch": 0.45, + "learning_rate": 1.1011848018521041e-05, + "loss": 0.5984, + "step": 9900 + }, + { + "epoch": 0.45, + "learning_rate": 1.1002769077125609e-05, + "loss": 0.7189, + "step": 9910 + }, + { + "epoch": 0.45, + "learning_rate": 1.0993690135730174e-05, + "loss": 0.7266, + "step": 9920 + }, + { + "epoch": 0.45, + "learning_rate": 1.0984611194334742e-05, + "loss": 0.7332, + "step": 9930 + }, + { + "epoch": 0.45, + "learning_rate": 1.0975532252939308e-05, + "loss": 0.6303, + "step": 9940 + }, + { + "epoch": 0.45, + "learning_rate": 1.0966453311543874e-05, + "loss": 0.5779, + "step": 9950 + }, + { + "epoch": 0.45, + "learning_rate": 1.0957374370148441e-05, + "loss": 0.648, + "step": 9960 + }, + { + "epoch": 0.45, + "learning_rate": 1.0948295428753009e-05, + "loss": 0.6674, + "step": 9970 + }, + { + "epoch": 0.45, + "learning_rate": 1.0939216487357576e-05, + "loss": 0.7422, + "step": 9980 + }, + { + "epoch": 0.45, + "learning_rate": 1.093013754596214e-05, + "loss": 0.7232, + "step": 9990 + }, + { + "epoch": 0.45, + "learning_rate": 1.0921058604566708e-05, + "loss": 0.742, + "step": 10000 + }, + { + "epoch": 0.45, + "eval_accuracy": 0.6108160309029455, + "eval_loss": 0.6693569421768188, + "eval_runtime": 72.9525, + "eval_samples_per_second": 56.777, + "eval_steps_per_second": 14.201, + "step": 10000 + }, + { + "epoch": 0.45, + "learning_rate": 1.0911979663171275e-05, + "loss": 0.5943, + "step": 10010 + }, + { + "epoch": 0.45, + "learning_rate": 1.0902900721775841e-05, + "loss": 0.6332, + "step": 10020 + }, + { + "epoch": 0.46, + "learning_rate": 1.0893821780380409e-05, + "loss": 0.7078, + "step": 10030 + }, + { + "epoch": 0.46, + "learning_rate": 1.0884742838984976e-05, + "loss": 0.6781, + "step": 10040 + }, + { + "epoch": 0.46, + "learning_rate": 1.087566389758954e-05, + "loss": 0.7174, + "step": 10050 + }, + { + "epoch": 0.46, + "learning_rate": 1.0866584956194108e-05, + "loss": 0.727, + "step": 10060 + }, + { + "epoch": 0.46, + "learning_rate": 1.0857506014798676e-05, + "loss": 0.6676, + "step": 10070 + }, + { + "epoch": 0.46, + "learning_rate": 1.0848427073403243e-05, + "loss": 0.6469, + "step": 10080 + }, + { + "epoch": 0.46, + "learning_rate": 1.0839348132007809e-05, + "loss": 0.7363, + "step": 10090 + }, + { + "epoch": 0.46, + "learning_rate": 1.0830269190612375e-05, + "loss": 0.768, + "step": 10100 + }, + { + "epoch": 0.46, + "learning_rate": 1.0821190249216942e-05, + "loss": 0.6092, + "step": 10110 + }, + { + "epoch": 0.46, + "learning_rate": 1.0812111307821508e-05, + "loss": 0.6672, + "step": 10120 + }, + { + "epoch": 0.46, + "learning_rate": 1.0803032366426076e-05, + "loss": 0.6822, + "step": 10130 + }, + { + "epoch": 0.46, + "learning_rate": 1.0793953425030643e-05, + "loss": 0.6617, + "step": 10140 + }, + { + "epoch": 0.46, + "learning_rate": 1.0784874483635208e-05, + "loss": 0.6945, + "step": 10150 + }, + { + "epoch": 0.46, + "learning_rate": 1.0775795542239775e-05, + "loss": 0.6867, + "step": 10160 + }, + { + "epoch": 0.46, + "learning_rate": 1.0766716600844343e-05, + "loss": 0.6332, + "step": 10170 + }, + { + "epoch": 0.46, + "learning_rate": 1.075763765944891e-05, + "loss": 0.6752, + "step": 10180 + }, + { + "epoch": 0.46, + "learning_rate": 1.0748558718053476e-05, + "loss": 0.6369, + "step": 10190 + }, + { + "epoch": 0.46, + "learning_rate": 1.0739479776658042e-05, + "loss": 0.6684, + "step": 10200 + }, + { + "epoch": 0.46, + "learning_rate": 1.073040083526261e-05, + "loss": 0.5461, + "step": 10210 + }, + { + "epoch": 0.46, + "learning_rate": 1.0721321893867175e-05, + "loss": 0.7184, + "step": 10220 + }, + { + "epoch": 0.46, + "learning_rate": 1.0712242952471743e-05, + "loss": 0.7496, + "step": 10230 + }, + { + "epoch": 0.46, + "learning_rate": 1.070316401107631e-05, + "loss": 0.6715, + "step": 10240 + }, + { + "epoch": 0.47, + "learning_rate": 1.0694085069680874e-05, + "loss": 0.6252, + "step": 10250 + }, + { + "epoch": 0.47, + "learning_rate": 1.0685006128285442e-05, + "loss": 0.6961, + "step": 10260 + }, + { + "epoch": 0.47, + "learning_rate": 1.067592718689001e-05, + "loss": 0.716, + "step": 10270 + }, + { + "epoch": 0.47, + "learning_rate": 1.0666848245494577e-05, + "loss": 0.7164, + "step": 10280 + }, + { + "epoch": 0.47, + "learning_rate": 1.0657769304099143e-05, + "loss": 0.6246, + "step": 10290 + }, + { + "epoch": 0.47, + "learning_rate": 1.0648690362703709e-05, + "loss": 0.6184, + "step": 10300 + }, + { + "epoch": 0.47, + "learning_rate": 1.0639611421308276e-05, + "loss": 0.6963, + "step": 10310 + }, + { + "epoch": 0.47, + "learning_rate": 1.0630532479912842e-05, + "loss": 0.6121, + "step": 10320 + }, + { + "epoch": 0.47, + "learning_rate": 1.062145353851741e-05, + "loss": 0.652, + "step": 10330 + }, + { + "epoch": 0.47, + "learning_rate": 1.0612374597121977e-05, + "loss": 0.6271, + "step": 10340 + }, + { + "epoch": 0.47, + "learning_rate": 1.0603295655726541e-05, + "loss": 0.624, + "step": 10350 + }, + { + "epoch": 0.47, + "learning_rate": 1.0594216714331109e-05, + "loss": 0.6789, + "step": 10360 + }, + { + "epoch": 0.47, + "learning_rate": 1.0585137772935677e-05, + "loss": 0.615, + "step": 10370 + }, + { + "epoch": 0.47, + "learning_rate": 1.0576058831540244e-05, + "loss": 0.6789, + "step": 10380 + }, + { + "epoch": 0.47, + "learning_rate": 1.056697989014481e-05, + "loss": 0.6871, + "step": 10390 + }, + { + "epoch": 0.47, + "learning_rate": 1.0557900948749377e-05, + "loss": 0.6904, + "step": 10400 + }, + { + "epoch": 0.47, + "learning_rate": 1.0548822007353943e-05, + "loss": 0.6643, + "step": 10410 + }, + { + "epoch": 0.47, + "learning_rate": 1.053974306595851e-05, + "loss": 0.6654, + "step": 10420 + }, + { + "epoch": 0.47, + "learning_rate": 1.0530664124563077e-05, + "loss": 0.7045, + "step": 10430 + }, + { + "epoch": 0.47, + "learning_rate": 1.0521585183167644e-05, + "loss": 0.6736, + "step": 10440 + }, + { + "epoch": 0.47, + "learning_rate": 1.0512506241772212e-05, + "loss": 0.7609, + "step": 10450 + }, + { + "epoch": 0.47, + "learning_rate": 1.0503427300376776e-05, + "loss": 0.6813, + "step": 10460 + }, + { + "epoch": 0.48, + "learning_rate": 1.0494348358981343e-05, + "loss": 0.6705, + "step": 10470 + }, + { + "epoch": 0.48, + "learning_rate": 1.0485269417585911e-05, + "loss": 0.6914, + "step": 10480 + }, + { + "epoch": 0.48, + "learning_rate": 1.0476190476190477e-05, + "loss": 0.7188, + "step": 10490 + }, + { + "epoch": 0.48, + "learning_rate": 1.0467111534795044e-05, + "loss": 0.6529, + "step": 10500 + }, + { + "epoch": 0.48, + "eval_accuracy": 0.6166103331723805, + "eval_loss": 0.6667417883872986, + "eval_runtime": 72.4713, + "eval_samples_per_second": 57.154, + "eval_steps_per_second": 14.295, + "step": 10500 + }, + { + "epoch": 0.48, + "learning_rate": 1.045803259339961e-05, + "loss": 0.6578, + "step": 10510 + }, + { + "epoch": 0.48, + "learning_rate": 1.0448953652004176e-05, + "loss": 0.7453, + "step": 10520 + }, + { + "epoch": 0.48, + "learning_rate": 1.0439874710608744e-05, + "loss": 0.6691, + "step": 10530 + }, + { + "epoch": 0.48, + "learning_rate": 1.0430795769213311e-05, + "loss": 0.6914, + "step": 10540 + }, + { + "epoch": 0.48, + "learning_rate": 1.0421716827817879e-05, + "loss": 0.7102, + "step": 10550 + }, + { + "epoch": 0.48, + "learning_rate": 1.0412637886422443e-05, + "loss": 0.6609, + "step": 10560 + }, + { + "epoch": 0.48, + "learning_rate": 1.040355894502701e-05, + "loss": 0.6842, + "step": 10570 + }, + { + "epoch": 0.48, + "learning_rate": 1.0394480003631578e-05, + "loss": 0.6832, + "step": 10580 + }, + { + "epoch": 0.48, + "learning_rate": 1.0385401062236144e-05, + "loss": 0.6773, + "step": 10590 + }, + { + "epoch": 0.48, + "learning_rate": 1.0376322120840711e-05, + "loss": 0.6449, + "step": 10600 + }, + { + "epoch": 0.48, + "learning_rate": 1.0367243179445277e-05, + "loss": 0.6217, + "step": 10610 + }, + { + "epoch": 0.48, + "learning_rate": 1.0358164238049843e-05, + "loss": 0.6234, + "step": 10620 + }, + { + "epoch": 0.48, + "learning_rate": 1.034908529665441e-05, + "loss": 0.5896, + "step": 10630 + }, + { + "epoch": 0.48, + "learning_rate": 1.0340006355258978e-05, + "loss": 0.6473, + "step": 10640 + }, + { + "epoch": 0.48, + "learning_rate": 1.0330927413863546e-05, + "loss": 0.7066, + "step": 10650 + }, + { + "epoch": 0.48, + "learning_rate": 1.032184847246811e-05, + "loss": 0.5883, + "step": 10660 + }, + { + "epoch": 0.48, + "learning_rate": 1.0312769531072677e-05, + "loss": 0.6541, + "step": 10670 + }, + { + "epoch": 0.48, + "learning_rate": 1.0303690589677245e-05, + "loss": 0.6373, + "step": 10680 + }, + { + "epoch": 0.49, + "learning_rate": 1.029461164828181e-05, + "loss": 0.7643, + "step": 10690 + }, + { + "epoch": 0.49, + "learning_rate": 1.0285532706886378e-05, + "loss": 0.6492, + "step": 10700 + }, + { + "epoch": 0.49, + "learning_rate": 1.0276453765490944e-05, + "loss": 0.7371, + "step": 10710 + }, + { + "epoch": 0.49, + "learning_rate": 1.026737482409551e-05, + "loss": 0.698, + "step": 10720 + }, + { + "epoch": 0.49, + "learning_rate": 1.0258295882700078e-05, + "loss": 0.7586, + "step": 10730 + }, + { + "epoch": 0.49, + "learning_rate": 1.0249216941304645e-05, + "loss": 0.6699, + "step": 10740 + }, + { + "epoch": 0.49, + "learning_rate": 1.0240137999909213e-05, + "loss": 0.708, + "step": 10750 + }, + { + "epoch": 0.49, + "learning_rate": 1.0231059058513779e-05, + "loss": 0.6816, + "step": 10760 + }, + { + "epoch": 0.49, + "learning_rate": 1.0221980117118344e-05, + "loss": 0.6832, + "step": 10770 + }, + { + "epoch": 0.49, + "learning_rate": 1.0212901175722912e-05, + "loss": 0.7, + "step": 10780 + }, + { + "epoch": 0.49, + "learning_rate": 1.0203822234327478e-05, + "loss": 0.6459, + "step": 10790 + }, + { + "epoch": 0.49, + "learning_rate": 1.0194743292932045e-05, + "loss": 0.6645, + "step": 10800 + }, + { + "epoch": 0.49, + "learning_rate": 1.0185664351536613e-05, + "loss": 0.6486, + "step": 10810 + }, + { + "epoch": 0.49, + "learning_rate": 1.0176585410141177e-05, + "loss": 0.6078, + "step": 10820 + }, + { + "epoch": 0.49, + "learning_rate": 1.0167506468745745e-05, + "loss": 0.6418, + "step": 10830 + }, + { + "epoch": 0.49, + "learning_rate": 1.0158427527350312e-05, + "loss": 0.6291, + "step": 10840 + }, + { + "epoch": 0.49, + "learning_rate": 1.014934858595488e-05, + "loss": 0.6855, + "step": 10850 + }, + { + "epoch": 0.49, + "learning_rate": 1.0140269644559445e-05, + "loss": 0.6988, + "step": 10860 + }, + { + "epoch": 0.49, + "learning_rate": 1.0131190703164011e-05, + "loss": 0.6133, + "step": 10870 + }, + { + "epoch": 0.49, + "learning_rate": 1.0122111761768579e-05, + "loss": 0.607, + "step": 10880 + }, + { + "epoch": 0.49, + "learning_rate": 1.0113032820373145e-05, + "loss": 0.7082, + "step": 10890 + }, + { + "epoch": 0.49, + "learning_rate": 1.0103953878977712e-05, + "loss": 0.6539, + "step": 10900 + }, + { + "epoch": 0.5, + "learning_rate": 1.009487493758228e-05, + "loss": 0.6391, + "step": 10910 + }, + { + "epoch": 0.5, + "learning_rate": 1.0085795996186844e-05, + "loss": 0.7664, + "step": 10920 + }, + { + "epoch": 0.5, + "learning_rate": 1.0076717054791412e-05, + "loss": 0.7229, + "step": 10930 + }, + { + "epoch": 0.5, + "learning_rate": 1.0067638113395979e-05, + "loss": 0.6549, + "step": 10940 + }, + { + "epoch": 0.5, + "learning_rate": 1.0058559172000547e-05, + "loss": 0.7496, + "step": 10950 + }, + { + "epoch": 0.5, + "learning_rate": 1.0049480230605112e-05, + "loss": 0.743, + "step": 10960 + }, + { + "epoch": 0.5, + "learning_rate": 1.0040401289209678e-05, + "loss": 0.7393, + "step": 10970 + }, + { + "epoch": 0.5, + "learning_rate": 1.0031322347814246e-05, + "loss": 0.7133, + "step": 10980 + }, + { + "epoch": 0.5, + "learning_rate": 1.0022243406418812e-05, + "loss": 0.6711, + "step": 10990 + }, + { + "epoch": 0.5, + "learning_rate": 1.001316446502338e-05, + "loss": 0.6803, + "step": 11000 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.6214389183969097, + "eval_loss": 0.6656657457351685, + "eval_runtime": 72.6193, + "eval_samples_per_second": 57.037, + "eval_steps_per_second": 14.266, + "step": 11000 + }, + { + "epoch": 0.5, + "learning_rate": 1.0004085523627947e-05, + "loss": 0.6867, + "step": 11010 + }, + { + "epoch": 0.5, + "learning_rate": 9.995006582232513e-06, + "loss": 0.6211, + "step": 11020 + }, + { + "epoch": 0.5, + "learning_rate": 9.985927640837078e-06, + "loss": 0.7129, + "step": 11030 + }, + { + "epoch": 0.5, + "learning_rate": 9.976848699441646e-06, + "loss": 0.7109, + "step": 11040 + }, + { + "epoch": 0.5, + "learning_rate": 9.967769758046214e-06, + "loss": 0.6867, + "step": 11050 + }, + { + "epoch": 0.5, + "learning_rate": 9.95869081665078e-06, + "loss": 0.6592, + "step": 11060 + }, + { + "epoch": 0.5, + "learning_rate": 9.949611875255345e-06, + "loss": 0.6264, + "step": 11070 + }, + { + "epoch": 0.5, + "learning_rate": 9.940532933859913e-06, + "loss": 0.6008, + "step": 11080 + }, + { + "epoch": 0.5, + "learning_rate": 9.931453992464479e-06, + "loss": 0.684, + "step": 11090 + }, + { + "epoch": 0.5, + "learning_rate": 9.922375051069046e-06, + "loss": 0.6029, + "step": 11100 + }, + { + "epoch": 0.5, + "learning_rate": 9.913296109673612e-06, + "loss": 0.6953, + "step": 11110 + }, + { + "epoch": 0.5, + "learning_rate": 9.90421716827818e-06, + "loss": 0.6785, + "step": 11120 + }, + { + "epoch": 0.51, + "learning_rate": 9.895138226882747e-06, + "loss": 0.5875, + "step": 11130 + }, + { + "epoch": 0.51, + "learning_rate": 9.886059285487313e-06, + "loss": 0.6793, + "step": 11140 + }, + { + "epoch": 0.51, + "learning_rate": 9.87698034409188e-06, + "loss": 0.6586, + "step": 11150 + }, + { + "epoch": 0.51, + "learning_rate": 9.867901402696446e-06, + "loss": 0.607, + "step": 11160 + }, + { + "epoch": 0.51, + "learning_rate": 9.858822461301012e-06, + "loss": 0.5693, + "step": 11170 + }, + { + "epoch": 0.51, + "learning_rate": 9.84974351990558e-06, + "loss": 0.6781, + "step": 11180 + }, + { + "epoch": 0.51, + "learning_rate": 9.840664578510146e-06, + "loss": 0.6658, + "step": 11190 + }, + { + "epoch": 0.51, + "learning_rate": 9.831585637114713e-06, + "loss": 0.7047, + "step": 11200 + }, + { + "epoch": 0.51, + "learning_rate": 9.822506695719279e-06, + "loss": 0.7633, + "step": 11210 + }, + { + "epoch": 0.51, + "learning_rate": 9.813427754323847e-06, + "loss": 0.6619, + "step": 11220 + }, + { + "epoch": 0.51, + "learning_rate": 9.804348812928414e-06, + "loss": 0.6832, + "step": 11230 + }, + { + "epoch": 0.51, + "learning_rate": 9.79526987153298e-06, + "loss": 0.6477, + "step": 11240 + }, + { + "epoch": 0.51, + "learning_rate": 9.786190930137547e-06, + "loss": 0.6107, + "step": 11250 + }, + { + "epoch": 0.51, + "learning_rate": 9.777111988742113e-06, + "loss": 0.658, + "step": 11260 + }, + { + "epoch": 0.51, + "learning_rate": 9.76803304734668e-06, + "loss": 0.6137, + "step": 11270 + }, + { + "epoch": 0.51, + "learning_rate": 9.758954105951247e-06, + "loss": 0.5883, + "step": 11280 + }, + { + "epoch": 0.51, + "learning_rate": 9.749875164555813e-06, + "loss": 0.683, + "step": 11290 + }, + { + "epoch": 0.51, + "learning_rate": 9.74079622316038e-06, + "loss": 0.6953, + "step": 11300 + }, + { + "epoch": 0.51, + "learning_rate": 9.731717281764948e-06, + "loss": 0.5729, + "step": 11310 + }, + { + "epoch": 0.51, + "learning_rate": 9.722638340369514e-06, + "loss": 0.7637, + "step": 11320 + }, + { + "epoch": 0.51, + "learning_rate": 9.713559398974081e-06, + "loss": 0.7246, + "step": 11330 + }, + { + "epoch": 0.51, + "learning_rate": 9.704480457578647e-06, + "loss": 0.6527, + "step": 11340 + }, + { + "epoch": 0.52, + "learning_rate": 9.695401516183214e-06, + "loss": 0.7195, + "step": 11350 + }, + { + "epoch": 0.52, + "learning_rate": 9.68632257478778e-06, + "loss": 0.6072, + "step": 11360 + }, + { + "epoch": 0.52, + "learning_rate": 9.677243633392346e-06, + "loss": 0.6926, + "step": 11370 + }, + { + "epoch": 0.52, + "learning_rate": 9.668164691996914e-06, + "loss": 0.649, + "step": 11380 + }, + { + "epoch": 0.52, + "learning_rate": 9.65908575060148e-06, + "loss": 0.5658, + "step": 11390 + }, + { + "epoch": 0.52, + "learning_rate": 9.650006809206047e-06, + "loss": 0.7312, + "step": 11400 + }, + { + "epoch": 0.52, + "learning_rate": 9.640927867810615e-06, + "loss": 0.6996, + "step": 11410 + }, + { + "epoch": 0.52, + "learning_rate": 9.63184892641518e-06, + "loss": 0.6453, + "step": 11420 + }, + { + "epoch": 0.52, + "learning_rate": 9.622769985019748e-06, + "loss": 0.6148, + "step": 11430 + }, + { + "epoch": 0.52, + "learning_rate": 9.613691043624314e-06, + "loss": 0.734, + "step": 11440 + }, + { + "epoch": 0.52, + "learning_rate": 9.604612102228881e-06, + "loss": 0.633, + "step": 11450 + }, + { + "epoch": 0.52, + "learning_rate": 9.595533160833447e-06, + "loss": 0.6434, + "step": 11460 + }, + { + "epoch": 0.52, + "learning_rate": 9.586454219438013e-06, + "loss": 0.6508, + "step": 11470 + }, + { + "epoch": 0.52, + "learning_rate": 9.57737527804258e-06, + "loss": 0.7098, + "step": 11480 + }, + { + "epoch": 0.52, + "learning_rate": 9.568296336647148e-06, + "loss": 0.6662, + "step": 11490 + }, + { + "epoch": 0.52, + "learning_rate": 9.559217395251714e-06, + "loss": 0.6588, + "step": 11500 + }, + { + "epoch": 0.52, + "eval_accuracy": 0.6219217769193627, + "eval_loss": 0.6640219688415527, + "eval_runtime": 74.4423, + "eval_samples_per_second": 55.64, + "eval_steps_per_second": 13.917, + "step": 11500 + }, + { + "epoch": 0.52, + "learning_rate": 9.550138453856282e-06, + "loss": 0.7092, + "step": 11510 + }, + { + "epoch": 0.52, + "learning_rate": 9.541059512460847e-06, + "loss": 0.6582, + "step": 11520 + }, + { + "epoch": 0.52, + "learning_rate": 9.531980571065415e-06, + "loss": 0.6588, + "step": 11530 + }, + { + "epoch": 0.52, + "learning_rate": 9.52290162966998e-06, + "loss": 0.7258, + "step": 11540 + }, + { + "epoch": 0.52, + "learning_rate": 9.513822688274548e-06, + "loss": 0.7121, + "step": 11550 + }, + { + "epoch": 0.52, + "learning_rate": 9.504743746879114e-06, + "loss": 0.6375, + "step": 11560 + }, + { + "epoch": 0.53, + "learning_rate": 9.49566480548368e-06, + "loss": 0.6449, + "step": 11570 + }, + { + "epoch": 0.53, + "learning_rate": 9.486585864088248e-06, + "loss": 0.7689, + "step": 11580 + }, + { + "epoch": 0.53, + "learning_rate": 9.477506922692815e-06, + "loss": 0.6449, + "step": 11590 + }, + { + "epoch": 0.53, + "learning_rate": 9.468427981297381e-06, + "loss": 0.7828, + "step": 11600 + }, + { + "epoch": 0.53, + "learning_rate": 9.459349039901949e-06, + "loss": 0.5143, + "step": 11610 + }, + { + "epoch": 0.53, + "learning_rate": 9.450270098506516e-06, + "loss": 0.6818, + "step": 11620 + }, + { + "epoch": 0.53, + "learning_rate": 9.441191157111082e-06, + "loss": 0.6859, + "step": 11630 + }, + { + "epoch": 0.53, + "learning_rate": 9.432112215715648e-06, + "loss": 0.5936, + "step": 11640 + }, + { + "epoch": 0.53, + "learning_rate": 9.423033274320215e-06, + "loss": 0.6316, + "step": 11650 + }, + { + "epoch": 0.53, + "learning_rate": 9.413954332924781e-06, + "loss": 0.6494, + "step": 11660 + }, + { + "epoch": 0.53, + "learning_rate": 9.404875391529349e-06, + "loss": 0.6939, + "step": 11670 + }, + { + "epoch": 0.53, + "learning_rate": 9.395796450133915e-06, + "loss": 0.6666, + "step": 11680 + }, + { + "epoch": 0.53, + "learning_rate": 9.386717508738482e-06, + "loss": 0.6594, + "step": 11690 + }, + { + "epoch": 0.53, + "learning_rate": 9.377638567343048e-06, + "loss": 0.6082, + "step": 11700 + }, + { + "epoch": 0.53, + "learning_rate": 9.368559625947616e-06, + "loss": 0.6211, + "step": 11710 + }, + { + "epoch": 0.53, + "learning_rate": 9.359480684552183e-06, + "loss": 0.7973, + "step": 11720 + }, + { + "epoch": 0.53, + "learning_rate": 9.350401743156749e-06, + "loss": 0.7477, + "step": 11730 + }, + { + "epoch": 0.53, + "learning_rate": 9.341322801761315e-06, + "loss": 0.5236, + "step": 11740 + }, + { + "epoch": 0.53, + "learning_rate": 9.332243860365882e-06, + "loss": 0.6607, + "step": 11750 + }, + { + "epoch": 0.53, + "learning_rate": 9.323164918970448e-06, + "loss": 0.6092, + "step": 11760 + }, + { + "epoch": 0.53, + "learning_rate": 9.314085977575016e-06, + "loss": 0.6771, + "step": 11770 + }, + { + "epoch": 0.53, + "learning_rate": 9.305007036179582e-06, + "loss": 0.6631, + "step": 11780 + }, + { + "epoch": 0.54, + "learning_rate": 9.295928094784149e-06, + "loss": 0.6256, + "step": 11790 + }, + { + "epoch": 0.54, + "learning_rate": 9.286849153388717e-06, + "loss": 0.7223, + "step": 11800 + }, + { + "epoch": 0.54, + "learning_rate": 9.277770211993282e-06, + "loss": 0.759, + "step": 11810 + }, + { + "epoch": 0.54, + "learning_rate": 9.26869127059785e-06, + "loss": 0.6131, + "step": 11820 + }, + { + "epoch": 0.54, + "learning_rate": 9.259612329202416e-06, + "loss": 0.6332, + "step": 11830 + }, + { + "epoch": 0.54, + "learning_rate": 9.250533387806982e-06, + "loss": 0.6002, + "step": 11840 + }, + { + "epoch": 0.54, + "learning_rate": 9.24145444641155e-06, + "loss": 0.5533, + "step": 11850 + }, + { + "epoch": 0.54, + "learning_rate": 9.232375505016115e-06, + "loss": 0.6191, + "step": 11860 + }, + { + "epoch": 0.54, + "learning_rate": 9.223296563620683e-06, + "loss": 0.7561, + "step": 11870 + }, + { + "epoch": 0.54, + "learning_rate": 9.214217622225248e-06, + "loss": 0.6852, + "step": 11880 + }, + { + "epoch": 0.54, + "learning_rate": 9.205138680829816e-06, + "loss": 0.7559, + "step": 11890 + }, + { + "epoch": 0.54, + "learning_rate": 9.196059739434384e-06, + "loss": 0.6805, + "step": 11900 + }, + { + "epoch": 0.54, + "learning_rate": 9.18698079803895e-06, + "loss": 0.6937, + "step": 11910 + }, + { + "epoch": 0.54, + "learning_rate": 9.177901856643517e-06, + "loss": 0.766, + "step": 11920 + }, + { + "epoch": 0.54, + "learning_rate": 9.168822915248083e-06, + "loss": 0.6945, + "step": 11930 + }, + { + "epoch": 0.54, + "learning_rate": 9.159743973852649e-06, + "loss": 0.5963, + "step": 11940 + }, + { + "epoch": 0.54, + "learning_rate": 9.150665032457216e-06, + "loss": 0.5949, + "step": 11950 + }, + { + "epoch": 0.54, + "learning_rate": 9.141586091061782e-06, + "loss": 0.7, + "step": 11960 + }, + { + "epoch": 0.54, + "learning_rate": 9.13250714966635e-06, + "loss": 0.6602, + "step": 11970 + }, + { + "epoch": 0.54, + "learning_rate": 9.123428208270917e-06, + "loss": 0.6168, + "step": 11980 + }, + { + "epoch": 0.54, + "learning_rate": 9.114349266875483e-06, + "loss": 0.65, + "step": 11990 + }, + { + "epoch": 0.54, + "learning_rate": 9.10527032548005e-06, + "loss": 0.5652, + "step": 12000 + }, + { + "epoch": 0.54, + "eval_accuracy": 0.6262675036214389, + "eval_loss": 0.6643086671829224, + "eval_runtime": 74.1561, + "eval_samples_per_second": 55.855, + "eval_steps_per_second": 13.971, + "step": 12000 + }, + { + "epoch": 0.55, + "learning_rate": 9.096191384084616e-06, + "loss": 0.5828, + "step": 12010 + }, + { + "epoch": 0.55, + "learning_rate": 9.087112442689184e-06, + "loss": 0.6129, + "step": 12020 + }, + { + "epoch": 0.55, + "learning_rate": 9.07803350129375e-06, + "loss": 0.6373, + "step": 12030 + }, + { + "epoch": 0.55, + "learning_rate": 9.068954559898316e-06, + "loss": 0.5529, + "step": 12040 + }, + { + "epoch": 0.55, + "learning_rate": 9.059875618502883e-06, + "loss": 0.5791, + "step": 12050 + }, + { + "epoch": 0.55, + "learning_rate": 9.050796677107449e-06, + "loss": 0.7039, + "step": 12060 + }, + { + "epoch": 0.55, + "learning_rate": 9.041717735712017e-06, + "loss": 0.6314, + "step": 12070 + }, + { + "epoch": 0.55, + "learning_rate": 9.032638794316584e-06, + "loss": 0.8344, + "step": 12080 + }, + { + "epoch": 0.55, + "learning_rate": 9.02355985292115e-06, + "loss": 0.6568, + "step": 12090 + }, + { + "epoch": 0.55, + "learning_rate": 9.014480911525718e-06, + "loss": 0.6854, + "step": 12100 + }, + { + "epoch": 0.55, + "learning_rate": 9.005401970130283e-06, + "loss": 0.7133, + "step": 12110 + }, + { + "epoch": 0.55, + "learning_rate": 8.996323028734851e-06, + "loss": 0.59, + "step": 12120 + }, + { + "epoch": 0.55, + "learning_rate": 8.987244087339417e-06, + "loss": 0.7049, + "step": 12130 + }, + { + "epoch": 0.55, + "learning_rate": 8.978165145943983e-06, + "loss": 0.5949, + "step": 12140 + }, + { + "epoch": 0.55, + "learning_rate": 8.96908620454855e-06, + "loss": 0.6137, + "step": 12150 + }, + { + "epoch": 0.55, + "learning_rate": 8.960007263153116e-06, + "loss": 0.8039, + "step": 12160 + }, + { + "epoch": 0.55, + "learning_rate": 8.950928321757684e-06, + "loss": 0.5955, + "step": 12170 + }, + { + "epoch": 0.55, + "learning_rate": 8.941849380362251e-06, + "loss": 0.558, + "step": 12180 + }, + { + "epoch": 0.55, + "learning_rate": 8.932770438966817e-06, + "loss": 0.6453, + "step": 12190 + }, + { + "epoch": 0.55, + "learning_rate": 8.923691497571384e-06, + "loss": 0.5607, + "step": 12200 + }, + { + "epoch": 0.55, + "learning_rate": 8.91461255617595e-06, + "loss": 0.6797, + "step": 12210 + }, + { + "epoch": 0.55, + "learning_rate": 8.905533614780518e-06, + "loss": 0.6105, + "step": 12220 + }, + { + "epoch": 0.56, + "learning_rate": 8.896454673385084e-06, + "loss": 0.607, + "step": 12230 + }, + { + "epoch": 0.56, + "learning_rate": 8.88737573198965e-06, + "loss": 0.7182, + "step": 12240 + }, + { + "epoch": 0.56, + "learning_rate": 8.878296790594217e-06, + "loss": 0.6537, + "step": 12250 + }, + { + "epoch": 0.56, + "learning_rate": 8.869217849198785e-06, + "loss": 0.8063, + "step": 12260 + }, + { + "epoch": 0.56, + "learning_rate": 8.86013890780335e-06, + "loss": 0.5801, + "step": 12270 + }, + { + "epoch": 0.56, + "learning_rate": 8.851059966407918e-06, + "loss": 0.707, + "step": 12280 + }, + { + "epoch": 0.56, + "learning_rate": 8.841981025012484e-06, + "loss": 0.7332, + "step": 12290 + }, + { + "epoch": 0.56, + "learning_rate": 8.832902083617051e-06, + "loss": 0.7797, + "step": 12300 + }, + { + "epoch": 0.56, + "learning_rate": 8.823823142221617e-06, + "loss": 0.7146, + "step": 12310 + }, + { + "epoch": 0.56, + "learning_rate": 8.814744200826185e-06, + "loss": 0.667, + "step": 12320 + }, + { + "epoch": 0.56, + "learning_rate": 8.80566525943075e-06, + "loss": 0.7193, + "step": 12330 + }, + { + "epoch": 0.56, + "learning_rate": 8.796586318035317e-06, + "loss": 0.7027, + "step": 12340 + }, + { + "epoch": 0.56, + "learning_rate": 8.787507376639884e-06, + "loss": 0.6092, + "step": 12350 + }, + { + "epoch": 0.56, + "learning_rate": 8.778428435244452e-06, + "loss": 0.6668, + "step": 12360 + }, + { + "epoch": 0.56, + "learning_rate": 8.769349493849017e-06, + "loss": 0.7051, + "step": 12370 + }, + { + "epoch": 0.56, + "learning_rate": 8.760270552453585e-06, + "loss": 0.7043, + "step": 12380 + }, + { + "epoch": 0.56, + "learning_rate": 8.751191611058153e-06, + "loss": 0.7746, + "step": 12390 + }, + { + "epoch": 0.56, + "learning_rate": 8.742112669662718e-06, + "loss": 0.5826, + "step": 12400 + }, + { + "epoch": 0.56, + "learning_rate": 8.733033728267284e-06, + "loss": 0.6295, + "step": 12410 + }, + { + "epoch": 0.56, + "learning_rate": 8.723954786871852e-06, + "loss": 0.6553, + "step": 12420 + }, + { + "epoch": 0.56, + "learning_rate": 8.714875845476418e-06, + "loss": 0.6766, + "step": 12430 + }, + { + "epoch": 0.56, + "learning_rate": 8.705796904080985e-06, + "loss": 0.5719, + "step": 12440 + }, + { + "epoch": 0.57, + "learning_rate": 8.696717962685551e-06, + "loss": 0.6652, + "step": 12450 + }, + { + "epoch": 0.57, + "learning_rate": 8.687639021290119e-06, + "loss": 0.7088, + "step": 12460 + }, + { + "epoch": 0.57, + "learning_rate": 8.678560079894684e-06, + "loss": 0.6367, + "step": 12470 + }, + { + "epoch": 0.57, + "learning_rate": 8.669481138499252e-06, + "loss": 0.7176, + "step": 12480 + }, + { + "epoch": 0.57, + "learning_rate": 8.66040219710382e-06, + "loss": 0.8223, + "step": 12490 + }, + { + "epoch": 0.57, + "learning_rate": 8.651323255708385e-06, + "loss": 0.7406, + "step": 12500 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.6284403669724771, + "eval_loss": 0.6642463803291321, + "eval_runtime": 74.8866, + "eval_samples_per_second": 55.31, + "eval_steps_per_second": 13.834, + "step": 12500 + }, + { + "epoch": 0.57, + "learning_rate": 8.642244314312951e-06, + "loss": 0.7641, + "step": 12510 + }, + { + "epoch": 0.57, + "learning_rate": 8.633165372917519e-06, + "loss": 0.6031, + "step": 12520 + }, + { + "epoch": 0.57, + "learning_rate": 8.624086431522085e-06, + "loss": 0.6365, + "step": 12530 + }, + { + "epoch": 0.57, + "learning_rate": 8.615007490126652e-06, + "loss": 0.5977, + "step": 12540 + }, + { + "epoch": 0.57, + "learning_rate": 8.605928548731218e-06, + "loss": 0.6783, + "step": 12550 + }, + { + "epoch": 0.57, + "learning_rate": 8.596849607335786e-06, + "loss": 0.7094, + "step": 12560 + }, + { + "epoch": 0.57, + "learning_rate": 8.587770665940353e-06, + "loss": 0.6758, + "step": 12570 + }, + { + "epoch": 0.57, + "learning_rate": 8.578691724544919e-06, + "loss": 0.7918, + "step": 12580 + }, + { + "epoch": 0.57, + "learning_rate": 8.569612783149486e-06, + "loss": 0.6793, + "step": 12590 + }, + { + "epoch": 0.57, + "learning_rate": 8.560533841754052e-06, + "loss": 0.7588, + "step": 12600 + }, + { + "epoch": 0.57, + "learning_rate": 8.551454900358618e-06, + "loss": 0.6875, + "step": 12610 + }, + { + "epoch": 0.57, + "learning_rate": 8.542375958963186e-06, + "loss": 0.5982, + "step": 12620 + }, + { + "epoch": 0.57, + "learning_rate": 8.533297017567752e-06, + "loss": 0.7289, + "step": 12630 + }, + { + "epoch": 0.57, + "learning_rate": 8.524218076172319e-06, + "loss": 0.641, + "step": 12640 + }, + { + "epoch": 0.57, + "learning_rate": 8.515139134776885e-06, + "loss": 0.6434, + "step": 12650 + }, + { + "epoch": 0.57, + "learning_rate": 8.506060193381452e-06, + "loss": 0.6051, + "step": 12660 + }, + { + "epoch": 0.58, + "learning_rate": 8.49698125198602e-06, + "loss": 0.7082, + "step": 12670 + }, + { + "epoch": 0.58, + "learning_rate": 8.487902310590586e-06, + "loss": 0.5811, + "step": 12680 + }, + { + "epoch": 0.58, + "learning_rate": 8.478823369195153e-06, + "loss": 0.573, + "step": 12690 + }, + { + "epoch": 0.58, + "learning_rate": 8.46974442779972e-06, + "loss": 0.6701, + "step": 12700 + }, + { + "epoch": 0.58, + "learning_rate": 8.460665486404285e-06, + "loss": 0.6824, + "step": 12710 + }, + { + "epoch": 0.58, + "learning_rate": 8.451586545008853e-06, + "loss": 0.5939, + "step": 12720 + }, + { + "epoch": 0.58, + "learning_rate": 8.442507603613419e-06, + "loss": 0.5402, + "step": 12730 + }, + { + "epoch": 0.58, + "learning_rate": 8.433428662217986e-06, + "loss": 0.7176, + "step": 12740 + }, + { + "epoch": 0.58, + "learning_rate": 8.424349720822554e-06, + "loss": 0.7102, + "step": 12750 + }, + { + "epoch": 0.58, + "learning_rate": 8.41527077942712e-06, + "loss": 0.7031, + "step": 12760 + }, + { + "epoch": 0.58, + "learning_rate": 8.406191838031687e-06, + "loss": 0.6232, + "step": 12770 + }, + { + "epoch": 0.58, + "learning_rate": 8.397112896636253e-06, + "loss": 0.7289, + "step": 12780 + }, + { + "epoch": 0.58, + "learning_rate": 8.38803395524082e-06, + "loss": 0.6281, + "step": 12790 + }, + { + "epoch": 0.58, + "learning_rate": 8.378955013845386e-06, + "loss": 0.6742, + "step": 12800 + }, + { + "epoch": 0.58, + "learning_rate": 8.369876072449952e-06, + "loss": 0.5912, + "step": 12810 + }, + { + "epoch": 0.58, + "learning_rate": 8.36079713105452e-06, + "loss": 0.6687, + "step": 12820 + }, + { + "epoch": 0.58, + "learning_rate": 8.351718189659085e-06, + "loss": 0.6396, + "step": 12830 + }, + { + "epoch": 0.58, + "learning_rate": 8.342639248263653e-06, + "loss": 0.6611, + "step": 12840 + }, + { + "epoch": 0.58, + "learning_rate": 8.33356030686822e-06, + "loss": 0.7992, + "step": 12850 + }, + { + "epoch": 0.58, + "learning_rate": 8.324481365472786e-06, + "loss": 0.5539, + "step": 12860 + }, + { + "epoch": 0.58, + "learning_rate": 8.315402424077354e-06, + "loss": 0.6166, + "step": 12870 + }, + { + "epoch": 0.58, + "learning_rate": 8.30632348268192e-06, + "loss": 0.6248, + "step": 12880 + }, + { + "epoch": 0.59, + "learning_rate": 8.297244541286487e-06, + "loss": 0.6965, + "step": 12890 + }, + { + "epoch": 0.59, + "learning_rate": 8.288165599891053e-06, + "loss": 0.6883, + "step": 12900 + }, + { + "epoch": 0.59, + "learning_rate": 8.279086658495619e-06, + "loss": 0.6459, + "step": 12910 + }, + { + "epoch": 0.59, + "learning_rate": 8.270007717100187e-06, + "loss": 0.6266, + "step": 12920 + }, + { + "epoch": 0.59, + "learning_rate": 8.260928775704754e-06, + "loss": 0.6408, + "step": 12930 + }, + { + "epoch": 0.59, + "learning_rate": 8.25184983430932e-06, + "loss": 0.6605, + "step": 12940 + }, + { + "epoch": 0.59, + "learning_rate": 8.242770892913888e-06, + "loss": 0.6299, + "step": 12950 + }, + { + "epoch": 0.59, + "learning_rate": 8.233691951518453e-06, + "loss": 0.7291, + "step": 12960 + }, + { + "epoch": 0.59, + "learning_rate": 8.224613010123021e-06, + "loss": 0.5949, + "step": 12970 + }, + { + "epoch": 0.59, + "learning_rate": 8.215534068727587e-06, + "loss": 0.6488, + "step": 12980 + }, + { + "epoch": 0.59, + "learning_rate": 8.206455127332154e-06, + "loss": 0.7426, + "step": 12990 + }, + { + "epoch": 0.59, + "learning_rate": 8.19737618593672e-06, + "loss": 0.6742, + "step": 13000 + }, + { + "epoch": 0.59, + "eval_accuracy": 0.6238532110091743, + "eval_loss": 0.6639795303344727, + "eval_runtime": 73.0379, + "eval_samples_per_second": 56.71, + "eval_steps_per_second": 14.184, + "step": 13000 + }, + { + "epoch": 0.59, + "learning_rate": 8.188297244541286e-06, + "loss": 0.6525, + "step": 13010 + }, + { + "epoch": 0.59, + "learning_rate": 8.179218303145854e-06, + "loss": 0.7201, + "step": 13020 + }, + { + "epoch": 0.59, + "learning_rate": 8.170139361750421e-06, + "loss": 0.5959, + "step": 13030 + }, + { + "epoch": 0.59, + "learning_rate": 8.161060420354987e-06, + "loss": 0.6248, + "step": 13040 + }, + { + "epoch": 0.59, + "learning_rate": 8.151981478959554e-06, + "loss": 0.7281, + "step": 13050 + }, + { + "epoch": 0.59, + "learning_rate": 8.14290253756412e-06, + "loss": 0.7008, + "step": 13060 + }, + { + "epoch": 0.59, + "learning_rate": 8.133823596168688e-06, + "loss": 0.6934, + "step": 13070 + }, + { + "epoch": 0.59, + "learning_rate": 8.124744654773254e-06, + "loss": 0.7391, + "step": 13080 + }, + { + "epoch": 0.59, + "learning_rate": 8.115665713377821e-06, + "loss": 0.6283, + "step": 13090 + }, + { + "epoch": 0.59, + "learning_rate": 8.106586771982387e-06, + "loss": 0.634, + "step": 13100 + }, + { + "epoch": 0.6, + "learning_rate": 8.097507830586955e-06, + "loss": 0.6631, + "step": 13110 + }, + { + "epoch": 0.6, + "learning_rate": 8.08842888919152e-06, + "loss": 0.6844, + "step": 13120 + }, + { + "epoch": 0.6, + "learning_rate": 8.079349947796088e-06, + "loss": 0.5783, + "step": 13130 + }, + { + "epoch": 0.6, + "learning_rate": 8.070271006400654e-06, + "loss": 0.657, + "step": 13140 + }, + { + "epoch": 0.6, + "learning_rate": 8.061192065005221e-06, + "loss": 0.7016, + "step": 13150 + }, + { + "epoch": 0.6, + "learning_rate": 8.052113123609789e-06, + "loss": 0.684, + "step": 13160 + }, + { + "epoch": 0.6, + "learning_rate": 8.043034182214355e-06, + "loss": 0.6338, + "step": 13170 + }, + { + "epoch": 0.6, + "learning_rate": 8.03395524081892e-06, + "loss": 0.6326, + "step": 13180 + }, + { + "epoch": 0.6, + "learning_rate": 8.024876299423488e-06, + "loss": 0.6875, + "step": 13190 + }, + { + "epoch": 0.6, + "learning_rate": 8.015797358028054e-06, + "loss": 0.6289, + "step": 13200 + }, + { + "epoch": 0.6, + "learning_rate": 8.006718416632622e-06, + "loss": 0.7225, + "step": 13210 + }, + { + "epoch": 0.6, + "learning_rate": 7.997639475237187e-06, + "loss": 0.7109, + "step": 13220 + }, + { + "epoch": 0.6, + "learning_rate": 7.988560533841755e-06, + "loss": 0.6988, + "step": 13230 + }, + { + "epoch": 0.6, + "learning_rate": 7.979481592446321e-06, + "loss": 0.6207, + "step": 13240 + }, + { + "epoch": 0.6, + "learning_rate": 7.970402651050888e-06, + "loss": 0.6271, + "step": 13250 + }, + { + "epoch": 0.6, + "learning_rate": 7.961323709655456e-06, + "loss": 0.6178, + "step": 13260 + }, + { + "epoch": 0.6, + "learning_rate": 7.952244768260022e-06, + "loss": 0.6682, + "step": 13270 + }, + { + "epoch": 0.6, + "learning_rate": 7.943165826864588e-06, + "loss": 0.6662, + "step": 13280 + }, + { + "epoch": 0.6, + "learning_rate": 7.934086885469155e-06, + "loss": 0.7354, + "step": 13290 + }, + { + "epoch": 0.6, + "learning_rate": 7.925007944073721e-06, + "loss": 0.6856, + "step": 13300 + }, + { + "epoch": 0.6, + "learning_rate": 7.915929002678289e-06, + "loss": 0.6242, + "step": 13310 + }, + { + "epoch": 0.6, + "learning_rate": 7.906850061282854e-06, + "loss": 0.6432, + "step": 13320 + }, + { + "epoch": 0.61, + "learning_rate": 7.897771119887422e-06, + "loss": 0.6941, + "step": 13330 + }, + { + "epoch": 0.61, + "learning_rate": 7.88869217849199e-06, + "loss": 0.7207, + "step": 13340 + }, + { + "epoch": 0.61, + "learning_rate": 7.879613237096555e-06, + "loss": 0.6047, + "step": 13350 + }, + { + "epoch": 0.61, + "learning_rate": 7.870534295701123e-06, + "loss": 0.6664, + "step": 13360 + }, + { + "epoch": 0.61, + "learning_rate": 7.861455354305689e-06, + "loss": 0.5578, + "step": 13370 + }, + { + "epoch": 0.61, + "learning_rate": 7.852376412910255e-06, + "loss": 0.6244, + "step": 13380 + }, + { + "epoch": 0.61, + "learning_rate": 7.843297471514822e-06, + "loss": 0.7566, + "step": 13390 + }, + { + "epoch": 0.61, + "learning_rate": 7.834218530119388e-06, + "loss": 0.666, + "step": 13400 + }, + { + "epoch": 0.61, + "learning_rate": 7.825139588723956e-06, + "loss": 0.5805, + "step": 13410 + }, + { + "epoch": 0.61, + "learning_rate": 7.816060647328521e-06, + "loss": 0.6395, + "step": 13420 + }, + { + "epoch": 0.61, + "learning_rate": 7.806981705933089e-06, + "loss": 0.7125, + "step": 13430 + }, + { + "epoch": 0.61, + "learning_rate": 7.797902764537656e-06, + "loss": 0.702, + "step": 13440 + }, + { + "epoch": 0.61, + "learning_rate": 7.788823823142222e-06, + "loss": 0.6018, + "step": 13450 + }, + { + "epoch": 0.61, + "learning_rate": 7.77974488174679e-06, + "loss": 0.7211, + "step": 13460 + }, + { + "epoch": 0.61, + "learning_rate": 7.770665940351356e-06, + "loss": 0.6887, + "step": 13470 + }, + { + "epoch": 0.61, + "learning_rate": 7.761586998955922e-06, + "loss": 0.666, + "step": 13480 + }, + { + "epoch": 0.61, + "learning_rate": 7.752508057560489e-06, + "loss": 0.6154, + "step": 13490 + }, + { + "epoch": 0.61, + "learning_rate": 7.743429116165055e-06, + "loss": 0.7082, + "step": 13500 + }, + { + "epoch": 0.61, + "eval_accuracy": 0.6262675036214389, + "eval_loss": 0.66704922914505, + "eval_runtime": 72.8876, + "eval_samples_per_second": 56.827, + "eval_steps_per_second": 14.214, + "step": 13500 + }, + { + "epoch": 0.61, + "learning_rate": 7.734350174769623e-06, + "loss": 0.6029, + "step": 13510 + }, + { + "epoch": 0.61, + "learning_rate": 7.72527123337419e-06, + "loss": 0.6395, + "step": 13520 + }, + { + "epoch": 0.61, + "learning_rate": 7.716192291978756e-06, + "loss": 0.6648, + "step": 13530 + }, + { + "epoch": 0.61, + "learning_rate": 7.707113350583323e-06, + "loss": 0.7348, + "step": 13540 + }, + { + "epoch": 0.62, + "learning_rate": 7.69803440918789e-06, + "loss": 0.583, + "step": 13550 + }, + { + "epoch": 0.62, + "learning_rate": 7.688955467792457e-06, + "loss": 0.707, + "step": 13560 + }, + { + "epoch": 0.62, + "learning_rate": 7.679876526397023e-06, + "loss": 0.625, + "step": 13570 + }, + { + "epoch": 0.62, + "learning_rate": 7.670797585001589e-06, + "loss": 0.6125, + "step": 13580 + }, + { + "epoch": 0.62, + "learning_rate": 7.661718643606156e-06, + "loss": 0.5121, + "step": 13590 + }, + { + "epoch": 0.62, + "learning_rate": 7.652639702210722e-06, + "loss": 0.7041, + "step": 13600 + }, + { + "epoch": 0.62, + "learning_rate": 7.64356076081529e-06, + "loss": 0.6555, + "step": 13610 + }, + { + "epoch": 0.62, + "learning_rate": 7.634481819419857e-06, + "loss": 0.5404, + "step": 13620 + }, + { + "epoch": 0.62, + "learning_rate": 7.625402878024423e-06, + "loss": 0.7129, + "step": 13630 + }, + { + "epoch": 0.62, + "learning_rate": 7.6163239366289896e-06, + "loss": 0.7699, + "step": 13640 + }, + { + "epoch": 0.62, + "learning_rate": 7.607244995233556e-06, + "loss": 0.6877, + "step": 13650 + }, + { + "epoch": 0.62, + "learning_rate": 7.598166053838123e-06, + "loss": 0.735, + "step": 13660 + }, + { + "epoch": 0.62, + "learning_rate": 7.5890871124426905e-06, + "loss": 0.7822, + "step": 13670 + }, + { + "epoch": 0.62, + "learning_rate": 7.580008171047256e-06, + "loss": 0.6674, + "step": 13680 + }, + { + "epoch": 0.62, + "learning_rate": 7.570929229651823e-06, + "loss": 0.6441, + "step": 13690 + }, + { + "epoch": 0.62, + "learning_rate": 7.561850288256391e-06, + "loss": 0.643, + "step": 13700 + }, + { + "epoch": 0.62, + "learning_rate": 7.5527713468609564e-06, + "loss": 0.6641, + "step": 13710 + }, + { + "epoch": 0.62, + "learning_rate": 7.543692405465524e-06, + "loss": 0.6915, + "step": 13720 + }, + { + "epoch": 0.62, + "learning_rate": 7.53461346407009e-06, + "loss": 0.7867, + "step": 13730 + }, + { + "epoch": 0.62, + "learning_rate": 7.5255345226746565e-06, + "loss": 0.7236, + "step": 13740 + }, + { + "epoch": 0.62, + "learning_rate": 7.516455581279224e-06, + "loss": 0.7305, + "step": 13750 + }, + { + "epoch": 0.62, + "learning_rate": 7.50737663988379e-06, + "loss": 0.6984, + "step": 13760 + }, + { + "epoch": 0.63, + "learning_rate": 7.4982976984883575e-06, + "loss": 0.7041, + "step": 13770 + }, + { + "epoch": 0.63, + "learning_rate": 7.489218757092923e-06, + "loss": 0.6264, + "step": 13780 + }, + { + "epoch": 0.63, + "learning_rate": 7.48013981569749e-06, + "loss": 0.6533, + "step": 13790 + }, + { + "epoch": 0.63, + "learning_rate": 7.4710608743020575e-06, + "loss": 0.7535, + "step": 13800 + }, + { + "epoch": 0.63, + "learning_rate": 7.461981932906623e-06, + "loss": 0.7496, + "step": 13810 + }, + { + "epoch": 0.63, + "learning_rate": 7.452902991511191e-06, + "loss": 0.7844, + "step": 13820 + }, + { + "epoch": 0.63, + "learning_rate": 7.443824050115757e-06, + "loss": 0.7195, + "step": 13830 + }, + { + "epoch": 0.63, + "learning_rate": 7.4347451087203235e-06, + "loss": 0.7344, + "step": 13840 + }, + { + "epoch": 0.63, + "learning_rate": 7.425666167324891e-06, + "loss": 0.623, + "step": 13850 + }, + { + "epoch": 0.63, + "learning_rate": 7.416587225929457e-06, + "loss": 0.6908, + "step": 13860 + }, + { + "epoch": 0.63, + "learning_rate": 7.407508284534024e-06, + "loss": 0.6525, + "step": 13870 + }, + { + "epoch": 0.63, + "learning_rate": 7.398429343138591e-06, + "loss": 0.6301, + "step": 13880 + }, + { + "epoch": 0.63, + "learning_rate": 7.389350401743157e-06, + "loss": 0.7547, + "step": 13890 + }, + { + "epoch": 0.63, + "learning_rate": 7.3802714603477245e-06, + "loss": 0.6453, + "step": 13900 + }, + { + "epoch": 0.63, + "learning_rate": 7.37119251895229e-06, + "loss": 0.8059, + "step": 13910 + }, + { + "epoch": 0.63, + "learning_rate": 7.362113577556858e-06, + "loss": 0.6408, + "step": 13920 + }, + { + "epoch": 0.63, + "learning_rate": 7.353034636161425e-06, + "loss": 0.6986, + "step": 13930 + }, + { + "epoch": 0.63, + "learning_rate": 7.3439556947659904e-06, + "loss": 0.5945, + "step": 13940 + }, + { + "epoch": 0.63, + "learning_rate": 7.334876753370558e-06, + "loss": 0.6809, + "step": 13950 + }, + { + "epoch": 0.63, + "learning_rate": 7.325797811975124e-06, + "loss": 0.7059, + "step": 13960 + }, + { + "epoch": 0.63, + "learning_rate": 7.316718870579691e-06, + "loss": 0.6023, + "step": 13970 + }, + { + "epoch": 0.63, + "learning_rate": 7.307639929184258e-06, + "loss": 0.5889, + "step": 13980 + }, + { + "epoch": 0.64, + "learning_rate": 7.298560987788824e-06, + "loss": 0.6645, + "step": 13990 + }, + { + "epoch": 0.64, + "learning_rate": 7.2894820463933915e-06, + "loss": 0.5768, + "step": 14000 + }, + { + "epoch": 0.64, + "eval_accuracy": 0.6269917914051183, + "eval_loss": 0.6619028449058533, + "eval_runtime": 73.4725, + "eval_samples_per_second": 56.375, + "eval_steps_per_second": 14.101, + "step": 14000 + }, + { + "epoch": 0.64, + "learning_rate": 7.280403104997957e-06, + "loss": 0.6783, + "step": 14010 + }, + { + "epoch": 0.64, + "learning_rate": 7.271324163602525e-06, + "loss": 0.59, + "step": 14020 + }, + { + "epoch": 0.64, + "learning_rate": 7.2622452222070916e-06, + "loss": 0.682, + "step": 14030 + }, + { + "epoch": 0.64, + "learning_rate": 7.253166280811657e-06, + "loss": 0.6873, + "step": 14040 + }, + { + "epoch": 0.64, + "learning_rate": 7.244087339416225e-06, + "loss": 0.7082, + "step": 14050 + }, + { + "epoch": 0.64, + "learning_rate": 7.235008398020792e-06, + "loss": 0.8027, + "step": 14060 + }, + { + "epoch": 0.64, + "learning_rate": 7.225929456625358e-06, + "loss": 0.6777, + "step": 14070 + }, + { + "epoch": 0.64, + "learning_rate": 7.216850515229925e-06, + "loss": 0.6312, + "step": 14080 + }, + { + "epoch": 0.64, + "learning_rate": 7.207771573834491e-06, + "loss": 0.6381, + "step": 14090 + }, + { + "epoch": 0.64, + "learning_rate": 7.1986926324390584e-06, + "loss": 0.5955, + "step": 14100 + }, + { + "epoch": 0.64, + "learning_rate": 7.189613691043625e-06, + "loss": 0.6518, + "step": 14110 + }, + { + "epoch": 0.64, + "learning_rate": 7.180534749648192e-06, + "loss": 0.5652, + "step": 14120 + }, + { + "epoch": 0.64, + "learning_rate": 7.1714558082527585e-06, + "loss": 0.7027, + "step": 14130 + }, + { + "epoch": 0.64, + "learning_rate": 7.162376866857324e-06, + "loss": 0.615, + "step": 14140 + }, + { + "epoch": 0.64, + "learning_rate": 7.153297925461892e-06, + "loss": 0.568, + "step": 14150 + }, + { + "epoch": 0.64, + "learning_rate": 7.144218984066459e-06, + "loss": 0.6701, + "step": 14160 + }, + { + "epoch": 0.64, + "learning_rate": 7.135140042671025e-06, + "loss": 0.7063, + "step": 14170 + }, + { + "epoch": 0.64, + "learning_rate": 7.126061101275592e-06, + "loss": 0.6428, + "step": 14180 + }, + { + "epoch": 0.64, + "learning_rate": 7.116982159880158e-06, + "loss": 0.6539, + "step": 14190 + }, + { + "epoch": 0.64, + "learning_rate": 7.107903218484725e-06, + "loss": 0.7867, + "step": 14200 + }, + { + "epoch": 0.65, + "learning_rate": 7.098824277089292e-06, + "loss": 0.6078, + "step": 14210 + }, + { + "epoch": 0.65, + "learning_rate": 7.089745335693859e-06, + "loss": 0.5559, + "step": 14220 + }, + { + "epoch": 0.65, + "learning_rate": 7.0806663942984255e-06, + "loss": 0.5851, + "step": 14230 + }, + { + "epoch": 0.65, + "learning_rate": 7.071587452902993e-06, + "loss": 0.5799, + "step": 14240 + }, + { + "epoch": 0.65, + "learning_rate": 7.062508511507559e-06, + "loss": 0.5656, + "step": 14250 + }, + { + "epoch": 0.65, + "learning_rate": 7.0534295701121256e-06, + "loss": 0.5715, + "step": 14260 + }, + { + "epoch": 0.65, + "learning_rate": 7.044350628716692e-06, + "loss": 0.7535, + "step": 14270 + }, + { + "epoch": 0.65, + "learning_rate": 7.035271687321259e-06, + "loss": 0.607, + "step": 14280 + }, + { + "epoch": 0.65, + "learning_rate": 7.0261927459258265e-06, + "loss": 0.6488, + "step": 14290 + }, + { + "epoch": 0.65, + "learning_rate": 7.017113804530392e-06, + "loss": 0.5721, + "step": 14300 + }, + { + "epoch": 0.65, + "learning_rate": 7.008034863134959e-06, + "loss": 0.7064, + "step": 14310 + }, + { + "epoch": 0.65, + "learning_rate": 6.998955921739526e-06, + "loss": 0.5836, + "step": 14320 + }, + { + "epoch": 0.65, + "learning_rate": 6.9898769803440924e-06, + "loss": 0.624, + "step": 14330 + }, + { + "epoch": 0.65, + "learning_rate": 6.98079803894866e-06, + "loss": 0.5609, + "step": 14340 + }, + { + "epoch": 0.65, + "learning_rate": 6.971719097553226e-06, + "loss": 0.6859, + "step": 14350 + }, + { + "epoch": 0.65, + "learning_rate": 6.9626401561577925e-06, + "loss": 0.652, + "step": 14360 + }, + { + "epoch": 0.65, + "learning_rate": 6.953561214762358e-06, + "loss": 0.8596, + "step": 14370 + }, + { + "epoch": 0.65, + "learning_rate": 6.944482273366926e-06, + "loss": 0.641, + "step": 14380 + }, + { + "epoch": 0.65, + "learning_rate": 6.9354033319714935e-06, + "loss": 0.7641, + "step": 14390 + }, + { + "epoch": 0.65, + "learning_rate": 6.926324390576059e-06, + "loss": 0.7662, + "step": 14400 + }, + { + "epoch": 0.65, + "learning_rate": 6.917245449180626e-06, + "loss": 0.698, + "step": 14410 + }, + { + "epoch": 0.65, + "learning_rate": 6.908166507785192e-06, + "loss": 0.6398, + "step": 14420 + }, + { + "epoch": 0.66, + "learning_rate": 6.899087566389759e-06, + "loss": 0.7016, + "step": 14430 + }, + { + "epoch": 0.66, + "learning_rate": 6.890008624994327e-06, + "loss": 0.6867, + "step": 14440 + }, + { + "epoch": 0.66, + "learning_rate": 6.880929683598893e-06, + "loss": 0.6895, + "step": 14450 + }, + { + "epoch": 0.66, + "learning_rate": 6.8718507422034595e-06, + "loss": 0.6836, + "step": 14460 + }, + { + "epoch": 0.66, + "learning_rate": 6.862771800808027e-06, + "loss": 0.6588, + "step": 14470 + }, + { + "epoch": 0.66, + "learning_rate": 6.853692859412593e-06, + "loss": 0.7346, + "step": 14480 + }, + { + "epoch": 0.66, + "learning_rate": 6.84461391801716e-06, + "loss": 0.6236, + "step": 14490 + }, + { + "epoch": 0.66, + "learning_rate": 6.835534976621726e-06, + "loss": 0.6082, + "step": 14500 + }, + { + "epoch": 0.66, + "eval_accuracy": 0.6154031868662482, + "eval_loss": 0.6647462248802185, + "eval_runtime": 73.4637, + "eval_samples_per_second": 56.382, + "eval_steps_per_second": 14.102, + "step": 14500 + }, + { + "epoch": 0.66, + "learning_rate": 6.826456035226293e-06, + "loss": 0.7277, + "step": 14510 + }, + { + "epoch": 0.66, + "learning_rate": 6.8173770938308605e-06, + "loss": 0.7398, + "step": 14520 + }, + { + "epoch": 0.66, + "learning_rate": 6.808298152435426e-06, + "loss": 0.702, + "step": 14530 + }, + { + "epoch": 0.66, + "learning_rate": 6.799219211039994e-06, + "loss": 0.6396, + "step": 14540 + }, + { + "epoch": 0.66, + "learning_rate": 6.79014026964456e-06, + "loss": 0.6211, + "step": 14550 + }, + { + "epoch": 0.66, + "learning_rate": 6.7810613282491264e-06, + "loss": 0.716, + "step": 14560 + }, + { + "epoch": 0.66, + "learning_rate": 6.771982386853694e-06, + "loss": 0.6299, + "step": 14570 + }, + { + "epoch": 0.66, + "learning_rate": 6.76290344545826e-06, + "loss": 0.6096, + "step": 14580 + }, + { + "epoch": 0.66, + "learning_rate": 6.753824504062827e-06, + "loss": 0.6121, + "step": 14590 + }, + { + "epoch": 0.66, + "learning_rate": 6.744745562667393e-06, + "loss": 0.65, + "step": 14600 + }, + { + "epoch": 0.66, + "learning_rate": 6.73566662127196e-06, + "loss": 0.7004, + "step": 14610 + }, + { + "epoch": 0.66, + "learning_rate": 6.7265876798765275e-06, + "loss": 0.5912, + "step": 14620 + }, + { + "epoch": 0.66, + "learning_rate": 6.717508738481093e-06, + "loss": 0.6816, + "step": 14630 + }, + { + "epoch": 0.66, + "learning_rate": 6.708429797085661e-06, + "loss": 0.7867, + "step": 14640 + }, + { + "epoch": 0.67, + "learning_rate": 6.6993508556902276e-06, + "loss": 0.6977, + "step": 14650 + }, + { + "epoch": 0.67, + "learning_rate": 6.690271914294793e-06, + "loss": 0.7098, + "step": 14660 + }, + { + "epoch": 0.67, + "learning_rate": 6.681192972899361e-06, + "loss": 0.749, + "step": 14670 + }, + { + "epoch": 0.67, + "learning_rate": 6.672114031503927e-06, + "loss": 0.5707, + "step": 14680 + }, + { + "epoch": 0.67, + "learning_rate": 6.663035090108494e-06, + "loss": 0.7465, + "step": 14690 + }, + { + "epoch": 0.67, + "learning_rate": 6.653956148713061e-06, + "loss": 0.6639, + "step": 14700 + }, + { + "epoch": 0.67, + "learning_rate": 6.644877207317627e-06, + "loss": 0.7758, + "step": 14710 + }, + { + "epoch": 0.67, + "learning_rate": 6.6357982659221944e-06, + "loss": 0.6439, + "step": 14720 + }, + { + "epoch": 0.67, + "learning_rate": 6.62671932452676e-06, + "loss": 0.5809, + "step": 14730 + }, + { + "epoch": 0.67, + "learning_rate": 6.617640383131327e-06, + "loss": 0.6887, + "step": 14740 + }, + { + "epoch": 0.67, + "learning_rate": 6.6085614417358945e-06, + "loss": 0.6861, + "step": 14750 + }, + { + "epoch": 0.67, + "learning_rate": 6.59948250034046e-06, + "loss": 0.6762, + "step": 14760 + }, + { + "epoch": 0.67, + "learning_rate": 6.590403558945028e-06, + "loss": 0.698, + "step": 14770 + }, + { + "epoch": 0.67, + "learning_rate": 6.581324617549594e-06, + "loss": 0.7, + "step": 14780 + }, + { + "epoch": 0.67, + "learning_rate": 6.5722456761541605e-06, + "loss": 0.652, + "step": 14790 + }, + { + "epoch": 0.67, + "learning_rate": 6.563166734758728e-06, + "loss": 0.6787, + "step": 14800 + }, + { + "epoch": 0.67, + "learning_rate": 6.554087793363294e-06, + "loss": 0.6215, + "step": 14810 + }, + { + "epoch": 0.67, + "learning_rate": 6.545008851967861e-06, + "loss": 0.8277, + "step": 14820 + }, + { + "epoch": 0.67, + "learning_rate": 6.535929910572428e-06, + "loss": 0.6615, + "step": 14830 + }, + { + "epoch": 0.67, + "learning_rate": 6.526850969176994e-06, + "loss": 0.6617, + "step": 14840 + }, + { + "epoch": 0.67, + "learning_rate": 6.5177720277815615e-06, + "loss": 0.6766, + "step": 14850 + }, + { + "epoch": 0.67, + "learning_rate": 6.508693086386127e-06, + "loss": 0.7777, + "step": 14860 + }, + { + "epoch": 0.68, + "learning_rate": 6.499614144990695e-06, + "loss": 0.7141, + "step": 14870 + }, + { + "epoch": 0.68, + "learning_rate": 6.4905352035952616e-06, + "loss": 0.6637, + "step": 14880 + }, + { + "epoch": 0.68, + "learning_rate": 6.481456262199827e-06, + "loss": 0.6723, + "step": 14890 + }, + { + "epoch": 0.68, + "learning_rate": 6.472377320804395e-06, + "loss": 0.6975, + "step": 14900 + }, + { + "epoch": 0.68, + "learning_rate": 6.463298379408961e-06, + "loss": 0.7087, + "step": 14910 + }, + { + "epoch": 0.68, + "learning_rate": 6.454219438013528e-06, + "loss": 0.6736, + "step": 14920 + }, + { + "epoch": 0.68, + "learning_rate": 6.445140496618095e-06, + "loss": 0.6164, + "step": 14930 + }, + { + "epoch": 0.68, + "learning_rate": 6.436061555222661e-06, + "loss": 0.6373, + "step": 14940 + }, + { + "epoch": 0.68, + "learning_rate": 6.4269826138272284e-06, + "loss": 0.707, + "step": 14950 + }, + { + "epoch": 0.68, + "learning_rate": 6.417903672431794e-06, + "loss": 0.6229, + "step": 14960 + }, + { + "epoch": 0.68, + "learning_rate": 6.408824731036362e-06, + "loss": 0.734, + "step": 14970 + }, + { + "epoch": 0.68, + "learning_rate": 6.3997457896409285e-06, + "loss": 0.6488, + "step": 14980 + }, + { + "epoch": 0.68, + "learning_rate": 6.390666848245494e-06, + "loss": 0.6781, + "step": 14990 + }, + { + "epoch": 0.68, + "learning_rate": 6.381587906850062e-06, + "loss": 0.616, + "step": 15000 + }, + { + "epoch": 0.68, + "eval_accuracy": 0.623128923225495, + "eval_loss": 0.6619547009468079, + "eval_runtime": 73.1853, + "eval_samples_per_second": 56.596, + "eval_steps_per_second": 14.156, + "step": 15000 + }, + { + "epoch": 0.68, + "learning_rate": 6.3725089654546295e-06, + "loss": 0.6363, + "step": 15010 + }, + { + "epoch": 0.68, + "learning_rate": 6.363430024059195e-06, + "loss": 0.7072, + "step": 15020 + }, + { + "epoch": 0.68, + "learning_rate": 6.354351082663762e-06, + "loss": 0.6193, + "step": 15030 + }, + { + "epoch": 0.68, + "learning_rate": 6.345272141268328e-06, + "loss": 0.6348, + "step": 15040 + }, + { + "epoch": 0.68, + "learning_rate": 6.336193199872895e-06, + "loss": 0.6953, + "step": 15050 + }, + { + "epoch": 0.68, + "learning_rate": 6.327114258477463e-06, + "loss": 0.5598, + "step": 15060 + }, + { + "epoch": 0.68, + "learning_rate": 6.318035317082029e-06, + "loss": 0.6547, + "step": 15070 + }, + { + "epoch": 0.68, + "learning_rate": 6.3089563756865955e-06, + "loss": 0.6287, + "step": 15080 + }, + { + "epoch": 0.69, + "learning_rate": 6.299877434291161e-06, + "loss": 0.5846, + "step": 15090 + }, + { + "epoch": 0.69, + "learning_rate": 6.290798492895729e-06, + "loss": 0.7217, + "step": 15100 + }, + { + "epoch": 0.69, + "learning_rate": 6.281719551500296e-06, + "loss": 0.7309, + "step": 15110 + }, + { + "epoch": 0.69, + "learning_rate": 6.272640610104862e-06, + "loss": 0.6762, + "step": 15120 + }, + { + "epoch": 0.69, + "learning_rate": 6.263561668709429e-06, + "loss": 0.7738, + "step": 15130 + }, + { + "epoch": 0.69, + "learning_rate": 6.254482727313995e-06, + "loss": 0.6906, + "step": 15140 + }, + { + "epoch": 0.69, + "learning_rate": 6.245403785918562e-06, + "loss": 0.6582, + "step": 15150 + }, + { + "epoch": 0.69, + "learning_rate": 6.236324844523129e-06, + "loss": 0.6228, + "step": 15160 + }, + { + "epoch": 0.69, + "learning_rate": 6.227245903127696e-06, + "loss": 0.7645, + "step": 15170 + }, + { + "epoch": 0.69, + "learning_rate": 6.2181669617322625e-06, + "loss": 0.6684, + "step": 15180 + }, + { + "epoch": 0.69, + "learning_rate": 6.20908802033683e-06, + "loss": 0.7598, + "step": 15190 + }, + { + "epoch": 0.69, + "learning_rate": 6.200009078941396e-06, + "loss": 0.6254, + "step": 15200 + }, + { + "epoch": 0.69, + "learning_rate": 6.1909301375459625e-06, + "loss": 0.6281, + "step": 15210 + }, + { + "epoch": 0.69, + "learning_rate": 6.181851196150529e-06, + "loss": 0.6301, + "step": 15220 + }, + { + "epoch": 0.69, + "learning_rate": 6.172772254755096e-06, + "loss": 0.6805, + "step": 15230 + }, + { + "epoch": 0.69, + "learning_rate": 6.1636933133596635e-06, + "loss": 0.7369, + "step": 15240 + }, + { + "epoch": 0.69, + "learning_rate": 6.154614371964229e-06, + "loss": 0.6443, + "step": 15250 + }, + { + "epoch": 0.69, + "learning_rate": 6.145535430568796e-06, + "loss": 0.6247, + "step": 15260 + }, + { + "epoch": 0.69, + "learning_rate": 6.136456489173363e-06, + "loss": 0.6518, + "step": 15270 + }, + { + "epoch": 0.69, + "learning_rate": 6.127377547777929e-06, + "loss": 0.6061, + "step": 15280 + }, + { + "epoch": 0.69, + "learning_rate": 6.118298606382497e-06, + "loss": 0.6303, + "step": 15290 + }, + { + "epoch": 0.69, + "learning_rate": 6.109219664987063e-06, + "loss": 0.5945, + "step": 15300 + }, + { + "epoch": 0.69, + "learning_rate": 6.1001407235916295e-06, + "loss": 0.5559, + "step": 15310 + }, + { + "epoch": 0.7, + "learning_rate": 6.091061782196196e-06, + "loss": 0.6348, + "step": 15320 + }, + { + "epoch": 0.7, + "learning_rate": 6.081982840800763e-06, + "loss": 0.5775, + "step": 15330 + }, + { + "epoch": 0.7, + "learning_rate": 6.0729038994053304e-06, + "loss": 0.6215, + "step": 15340 + }, + { + "epoch": 0.7, + "learning_rate": 6.063824958009896e-06, + "loss": 0.641, + "step": 15350 + }, + { + "epoch": 0.7, + "learning_rate": 6.054746016614463e-06, + "loss": 0.6947, + "step": 15360 + }, + { + "epoch": 0.7, + "learning_rate": 6.04566707521903e-06, + "loss": 0.698, + "step": 15370 + }, + { + "epoch": 0.7, + "learning_rate": 6.036588133823596e-06, + "loss": 0.7543, + "step": 15380 + }, + { + "epoch": 0.7, + "learning_rate": 6.027509192428164e-06, + "loss": 0.7227, + "step": 15390 + }, + { + "epoch": 0.7, + "learning_rate": 6.01843025103273e-06, + "loss": 0.6172, + "step": 15400 + }, + { + "epoch": 0.7, + "learning_rate": 6.0093513096372965e-06, + "loss": 0.7646, + "step": 15410 + }, + { + "epoch": 0.7, + "learning_rate": 6.000272368241864e-06, + "loss": 0.7711, + "step": 15420 + }, + { + "epoch": 0.7, + "learning_rate": 5.99119342684643e-06, + "loss": 0.6197, + "step": 15430 + }, + { + "epoch": 0.7, + "learning_rate": 5.982114485450997e-06, + "loss": 0.6904, + "step": 15440 + }, + { + "epoch": 0.7, + "learning_rate": 5.973035544055563e-06, + "loss": 0.7703, + "step": 15450 + }, + { + "epoch": 0.7, + "learning_rate": 5.96395660266013e-06, + "loss": 0.607, + "step": 15460 + }, + { + "epoch": 0.7, + "learning_rate": 5.9548776612646975e-06, + "loss": 0.877, + "step": 15470 + }, + { + "epoch": 0.7, + "learning_rate": 5.945798719869263e-06, + "loss": 0.6273, + "step": 15480 + }, + { + "epoch": 0.7, + "learning_rate": 5.936719778473831e-06, + "loss": 0.7051, + "step": 15490 + }, + { + "epoch": 0.7, + "learning_rate": 5.927640837078397e-06, + "loss": 0.7094, + "step": 15500 + }, + { + "epoch": 0.7, + "eval_accuracy": 0.6284403669724771, + "eval_loss": 0.6594442129135132, + "eval_runtime": 73.777, + "eval_samples_per_second": 56.142, + "eval_steps_per_second": 14.042, + "step": 15500 + }, + { + "epoch": 0.7, + "learning_rate": 5.9185618956829634e-06, + "loss": 0.6646, + "step": 15510 + }, + { + "epoch": 0.7, + "learning_rate": 5.909482954287531e-06, + "loss": 0.6553, + "step": 15520 + }, + { + "epoch": 0.7, + "learning_rate": 5.900404012892097e-06, + "loss": 0.7227, + "step": 15530 + }, + { + "epoch": 0.71, + "learning_rate": 5.891325071496664e-06, + "loss": 0.6779, + "step": 15540 + }, + { + "epoch": 0.71, + "learning_rate": 5.88224613010123e-06, + "loss": 0.6443, + "step": 15550 + }, + { + "epoch": 0.71, + "learning_rate": 5.873167188705797e-06, + "loss": 0.7105, + "step": 15560 + }, + { + "epoch": 0.71, + "learning_rate": 5.8640882473103644e-06, + "loss": 0.6504, + "step": 15570 + }, + { + "epoch": 0.71, + "learning_rate": 5.85500930591493e-06, + "loss": 0.5859, + "step": 15580 + }, + { + "epoch": 0.71, + "learning_rate": 5.845930364519498e-06, + "loss": 0.5393, + "step": 15590 + }, + { + "epoch": 0.71, + "learning_rate": 5.8368514231240645e-06, + "loss": 0.6582, + "step": 15600 + }, + { + "epoch": 0.71, + "learning_rate": 5.82777248172863e-06, + "loss": 0.6428, + "step": 15610 + }, + { + "epoch": 0.71, + "learning_rate": 5.818693540333198e-06, + "loss": 0.6365, + "step": 15620 + }, + { + "epoch": 0.71, + "learning_rate": 5.809614598937764e-06, + "loss": 0.6689, + "step": 15630 + }, + { + "epoch": 0.71, + "learning_rate": 5.800535657542331e-06, + "loss": 0.6746, + "step": 15640 + }, + { + "epoch": 0.71, + "learning_rate": 5.791456716146898e-06, + "loss": 0.7037, + "step": 15650 + }, + { + "epoch": 0.71, + "learning_rate": 5.782377774751464e-06, + "loss": 0.6582, + "step": 15660 + }, + { + "epoch": 0.71, + "learning_rate": 5.773298833356031e-06, + "loss": 0.5789, + "step": 15670 + }, + { + "epoch": 0.71, + "learning_rate": 5.764219891960597e-06, + "loss": 0.6115, + "step": 15680 + }, + { + "epoch": 0.71, + "learning_rate": 5.755140950565165e-06, + "loss": 0.6639, + "step": 15690 + }, + { + "epoch": 0.71, + "learning_rate": 5.7460620091697315e-06, + "loss": 0.7234, + "step": 15700 + }, + { + "epoch": 0.71, + "learning_rate": 5.736983067774297e-06, + "loss": 0.6412, + "step": 15710 + }, + { + "epoch": 0.71, + "learning_rate": 5.727904126378865e-06, + "loss": 0.6016, + "step": 15720 + }, + { + "epoch": 0.71, + "learning_rate": 5.718825184983431e-06, + "loss": 0.6486, + "step": 15730 + }, + { + "epoch": 0.71, + "learning_rate": 5.709746243587998e-06, + "loss": 0.6322, + "step": 15740 + }, + { + "epoch": 0.71, + "learning_rate": 5.700667302192565e-06, + "loss": 0.649, + "step": 15750 + }, + { + "epoch": 0.72, + "learning_rate": 5.691588360797131e-06, + "loss": 0.65, + "step": 15760 + }, + { + "epoch": 0.72, + "learning_rate": 5.682509419401698e-06, + "loss": 0.6649, + "step": 15770 + }, + { + "epoch": 0.72, + "learning_rate": 5.673430478006265e-06, + "loss": 0.6457, + "step": 15780 + }, + { + "epoch": 0.72, + "learning_rate": 5.664351536610832e-06, + "loss": 0.6773, + "step": 15790 + }, + { + "epoch": 0.72, + "learning_rate": 5.6552725952153985e-06, + "loss": 0.5795, + "step": 15800 + }, + { + "epoch": 0.72, + "learning_rate": 5.646193653819964e-06, + "loss": 0.5668, + "step": 15810 + }, + { + "epoch": 0.72, + "learning_rate": 5.637114712424532e-06, + "loss": 0.5938, + "step": 15820 + }, + { + "epoch": 0.72, + "learning_rate": 5.6280357710290985e-06, + "loss": 0.7176, + "step": 15830 + }, + { + "epoch": 0.72, + "learning_rate": 5.618956829633665e-06, + "loss": 0.6992, + "step": 15840 + }, + { + "epoch": 0.72, + "learning_rate": 5.609877888238232e-06, + "loss": 0.6344, + "step": 15850 + }, + { + "epoch": 0.72, + "learning_rate": 5.600798946842798e-06, + "loss": 0.5469, + "step": 15860 + }, + { + "epoch": 0.72, + "learning_rate": 5.591720005447365e-06, + "loss": 0.6225, + "step": 15870 + }, + { + "epoch": 0.72, + "learning_rate": 5.582641064051932e-06, + "loss": 0.6426, + "step": 15880 + }, + { + "epoch": 0.72, + "learning_rate": 5.573562122656499e-06, + "loss": 0.6982, + "step": 15890 + }, + { + "epoch": 0.72, + "learning_rate": 5.564483181261065e-06, + "loss": 0.6342, + "step": 15900 + }, + { + "epoch": 0.72, + "learning_rate": 5.555404239865631e-06, + "loss": 0.6494, + "step": 15910 + }, + { + "epoch": 0.72, + "learning_rate": 5.546325298470199e-06, + "loss": 0.8711, + "step": 15920 + }, + { + "epoch": 0.72, + "learning_rate": 5.5372463570747655e-06, + "loss": 0.6488, + "step": 15930 + }, + { + "epoch": 0.72, + "learning_rate": 5.528167415679332e-06, + "loss": 0.6447, + "step": 15940 + }, + { + "epoch": 0.72, + "learning_rate": 5.519088474283899e-06, + "loss": 0.6604, + "step": 15950 + }, + { + "epoch": 0.72, + "learning_rate": 5.5100095328884664e-06, + "loss": 0.6629, + "step": 15960 + }, + { + "epoch": 0.72, + "learning_rate": 5.500930591493032e-06, + "loss": 0.7656, + "step": 15970 + }, + { + "epoch": 0.73, + "learning_rate": 5.491851650097599e-06, + "loss": 0.6625, + "step": 15980 + }, + { + "epoch": 0.73, + "learning_rate": 5.482772708702166e-06, + "loss": 0.6834, + "step": 15990 + }, + { + "epoch": 0.73, + "learning_rate": 5.473693767306732e-06, + "loss": 0.5994, + "step": 16000 + }, + { + "epoch": 0.73, + "eval_accuracy": 0.6253017865765331, + "eval_loss": 0.6599798798561096, + "eval_runtime": 73.4934, + "eval_samples_per_second": 56.359, + "eval_steps_per_second": 14.097, + "step": 16000 + }, + { + "epoch": 0.73, + "learning_rate": 5.4646148259113e-06, + "loss": 0.4986, + "step": 16010 + }, + { + "epoch": 0.73, + "learning_rate": 5.455535884515866e-06, + "loss": 0.6568, + "step": 16020 + }, + { + "epoch": 0.73, + "learning_rate": 5.4464569431204325e-06, + "loss": 0.7852, + "step": 16030 + }, + { + "epoch": 0.73, + "learning_rate": 5.437378001724999e-06, + "loss": 0.6438, + "step": 16040 + }, + { + "epoch": 0.73, + "learning_rate": 5.428299060329566e-06, + "loss": 0.6207, + "step": 16050 + }, + { + "epoch": 0.73, + "learning_rate": 5.419220118934133e-06, + "loss": 0.7109, + "step": 16060 + }, + { + "epoch": 0.73, + "learning_rate": 5.410141177538699e-06, + "loss": 0.5861, + "step": 16070 + }, + { + "epoch": 0.73, + "learning_rate": 5.401062236143266e-06, + "loss": 0.698, + "step": 16080 + }, + { + "epoch": 0.73, + "learning_rate": 5.391983294747833e-06, + "loss": 0.6994, + "step": 16090 + }, + { + "epoch": 0.73, + "learning_rate": 5.382904353352399e-06, + "loss": 0.7209, + "step": 16100 + }, + { + "epoch": 0.73, + "learning_rate": 5.373825411956967e-06, + "loss": 0.6584, + "step": 16110 + }, + { + "epoch": 0.73, + "learning_rate": 5.364746470561533e-06, + "loss": 0.6723, + "step": 16120 + }, + { + "epoch": 0.73, + "learning_rate": 5.3556675291660994e-06, + "loss": 0.609, + "step": 16130 + }, + { + "epoch": 0.73, + "learning_rate": 5.346588587770667e-06, + "loss": 0.6607, + "step": 16140 + }, + { + "epoch": 0.73, + "learning_rate": 5.337509646375233e-06, + "loss": 0.6723, + "step": 16150 + }, + { + "epoch": 0.73, + "learning_rate": 5.3284307049798e-06, + "loss": 0.6447, + "step": 16160 + }, + { + "epoch": 0.73, + "learning_rate": 5.319351763584366e-06, + "loss": 0.732, + "step": 16170 + }, + { + "epoch": 0.73, + "learning_rate": 5.310272822188933e-06, + "loss": 0.6377, + "step": 16180 + }, + { + "epoch": 0.73, + "learning_rate": 5.3011938807935005e-06, + "loss": 0.6717, + "step": 16190 + }, + { + "epoch": 0.74, + "learning_rate": 5.292114939398066e-06, + "loss": 0.6777, + "step": 16200 + }, + { + "epoch": 0.74, + "learning_rate": 5.283035998002634e-06, + "loss": 0.7375, + "step": 16210 + }, + { + "epoch": 0.74, + "learning_rate": 5.2739570566072e-06, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 0.74, + "learning_rate": 5.264878115211766e-06, + "loss": 0.8133, + "step": 16230 + }, + { + "epoch": 0.74, + "learning_rate": 5.255799173816334e-06, + "loss": 0.6363, + "step": 16240 + }, + { + "epoch": 0.74, + "learning_rate": 5.2467202324209e-06, + "loss": 0.7176, + "step": 16250 + }, + { + "epoch": 0.74, + "learning_rate": 5.237641291025467e-06, + "loss": 0.7248, + "step": 16260 + }, + { + "epoch": 0.74, + "learning_rate": 5.228562349630033e-06, + "loss": 0.7535, + "step": 16270 + }, + { + "epoch": 0.74, + "learning_rate": 5.2194834082346e-06, + "loss": 0.6496, + "step": 16280 + }, + { + "epoch": 0.74, + "learning_rate": 5.210404466839167e-06, + "loss": 0.6551, + "step": 16290 + }, + { + "epoch": 0.74, + "learning_rate": 5.201325525443733e-06, + "loss": 0.7176, + "step": 16300 + }, + { + "epoch": 0.74, + "learning_rate": 5.192246584048301e-06, + "loss": 0.7488, + "step": 16310 + }, + { + "epoch": 0.74, + "learning_rate": 5.1831676426528675e-06, + "loss": 0.5922, + "step": 16320 + }, + { + "epoch": 0.74, + "learning_rate": 5.174088701257433e-06, + "loss": 0.6285, + "step": 16330 + }, + { + "epoch": 0.74, + "learning_rate": 5.165009759862001e-06, + "loss": 0.7195, + "step": 16340 + }, + { + "epoch": 0.74, + "learning_rate": 5.155930818466567e-06, + "loss": 0.5939, + "step": 16350 + }, + { + "epoch": 0.74, + "learning_rate": 5.146851877071134e-06, + "loss": 0.6443, + "step": 16360 + }, + { + "epoch": 0.74, + "learning_rate": 5.137772935675701e-06, + "loss": 0.7504, + "step": 16370 + }, + { + "epoch": 0.74, + "learning_rate": 5.128693994280267e-06, + "loss": 0.5725, + "step": 16380 + }, + { + "epoch": 0.74, + "learning_rate": 5.119615052884834e-06, + "loss": 0.6207, + "step": 16390 + }, + { + "epoch": 0.74, + "learning_rate": 5.1105361114894e-06, + "loss": 0.6525, + "step": 16400 + }, + { + "epoch": 0.74, + "learning_rate": 5.101457170093968e-06, + "loss": 0.6426, + "step": 16410 + }, + { + "epoch": 0.75, + "learning_rate": 5.0923782286985345e-06, + "loss": 0.6492, + "step": 16420 + }, + { + "epoch": 0.75, + "learning_rate": 5.0832992873031e-06, + "loss": 0.6195, + "step": 16430 + }, + { + "epoch": 0.75, + "learning_rate": 5.074220345907668e-06, + "loss": 0.7227, + "step": 16440 + }, + { + "epoch": 0.75, + "learning_rate": 5.065141404512234e-06, + "loss": 0.6459, + "step": 16450 + }, + { + "epoch": 0.75, + "learning_rate": 5.056062463116801e-06, + "loss": 0.7871, + "step": 16460 + }, + { + "epoch": 0.75, + "learning_rate": 5.046983521721368e-06, + "loss": 0.709, + "step": 16470 + }, + { + "epoch": 0.75, + "learning_rate": 5.037904580325934e-06, + "loss": 0.7148, + "step": 16480 + }, + { + "epoch": 0.75, + "learning_rate": 5.028825638930501e-06, + "loss": 0.7053, + "step": 16490 + }, + { + "epoch": 0.75, + "learning_rate": 5.019746697535067e-06, + "loss": 0.6969, + "step": 16500 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.6332689521970063, + "eval_loss": 0.6564537286758423, + "eval_runtime": 73.2629, + "eval_samples_per_second": 56.536, + "eval_steps_per_second": 14.141, + "step": 16500 + }, + { + "epoch": 0.75, + "learning_rate": 5.010667756139635e-06, + "loss": 0.6316, + "step": 16510 + }, + { + "epoch": 0.75, + "learning_rate": 5.0015888147442014e-06, + "loss": 0.649, + "step": 16520 + }, + { + "epoch": 0.75, + "learning_rate": 4.992509873348768e-06, + "loss": 0.6922, + "step": 16530 + }, + { + "epoch": 0.75, + "learning_rate": 4.983430931953335e-06, + "loss": 0.7539, + "step": 16540 + }, + { + "epoch": 0.75, + "learning_rate": 4.9743519905579015e-06, + "loss": 0.7371, + "step": 16550 + }, + { + "epoch": 0.75, + "learning_rate": 4.965273049162468e-06, + "loss": 0.7238, + "step": 16560 + }, + { + "epoch": 0.75, + "learning_rate": 4.956194107767035e-06, + "loss": 0.6398, + "step": 16570 + }, + { + "epoch": 0.75, + "learning_rate": 4.947115166371602e-06, + "loss": 0.6449, + "step": 16580 + }, + { + "epoch": 0.75, + "learning_rate": 4.938036224976168e-06, + "loss": 0.6453, + "step": 16590 + }, + { + "epoch": 0.75, + "learning_rate": 4.928957283580735e-06, + "loss": 0.6664, + "step": 16600 + }, + { + "epoch": 0.75, + "learning_rate": 4.919878342185302e-06, + "loss": 0.6332, + "step": 16610 + }, + { + "epoch": 0.75, + "learning_rate": 4.910799400789868e-06, + "loss": 0.6787, + "step": 16620 + }, + { + "epoch": 0.75, + "learning_rate": 4.901720459394435e-06, + "loss": 0.6451, + "step": 16630 + }, + { + "epoch": 0.76, + "learning_rate": 4.892641517999002e-06, + "loss": 0.6668, + "step": 16640 + }, + { + "epoch": 0.76, + "learning_rate": 4.8835625766035685e-06, + "loss": 0.7297, + "step": 16650 + }, + { + "epoch": 0.76, + "learning_rate": 4.874483635208135e-06, + "loss": 0.6457, + "step": 16660 + }, + { + "epoch": 0.76, + "learning_rate": 4.865404693812702e-06, + "loss": 0.709, + "step": 16670 + }, + { + "epoch": 0.76, + "learning_rate": 4.8563257524172686e-06, + "loss": 0.6281, + "step": 16680 + }, + { + "epoch": 0.76, + "learning_rate": 4.847246811021835e-06, + "loss": 0.7352, + "step": 16690 + }, + { + "epoch": 0.76, + "learning_rate": 4.838167869626402e-06, + "loss": 0.6623, + "step": 16700 + }, + { + "epoch": 0.76, + "learning_rate": 4.829088928230969e-06, + "loss": 0.6873, + "step": 16710 + }, + { + "epoch": 0.76, + "learning_rate": 4.820009986835535e-06, + "loss": 0.6742, + "step": 16720 + }, + { + "epoch": 0.76, + "learning_rate": 4.810931045440102e-06, + "loss": 0.5553, + "step": 16730 + }, + { + "epoch": 0.76, + "learning_rate": 4.801852104044669e-06, + "loss": 0.652, + "step": 16740 + }, + { + "epoch": 0.76, + "learning_rate": 4.7927731626492354e-06, + "loss": 0.6018, + "step": 16750 + }, + { + "epoch": 0.76, + "learning_rate": 4.783694221253802e-06, + "loss": 0.6379, + "step": 16760 + }, + { + "epoch": 0.76, + "learning_rate": 4.774615279858369e-06, + "loss": 0.6402, + "step": 16770 + }, + { + "epoch": 0.76, + "learning_rate": 4.7655363384629355e-06, + "loss": 0.6189, + "step": 16780 + }, + { + "epoch": 0.76, + "learning_rate": 4.756457397067502e-06, + "loss": 0.7432, + "step": 16790 + }, + { + "epoch": 0.76, + "learning_rate": 4.747378455672069e-06, + "loss": 0.7102, + "step": 16800 + }, + { + "epoch": 0.76, + "learning_rate": 4.738299514276636e-06, + "loss": 0.6535, + "step": 16810 + }, + { + "epoch": 0.76, + "learning_rate": 4.729220572881202e-06, + "loss": 0.6627, + "step": 16820 + }, + { + "epoch": 0.76, + "learning_rate": 4.720141631485769e-06, + "loss": 0.5818, + "step": 16830 + }, + { + "epoch": 0.76, + "learning_rate": 4.711062690090336e-06, + "loss": 0.635, + "step": 16840 + }, + { + "epoch": 0.76, + "learning_rate": 4.701983748694902e-06, + "loss": 0.7068, + "step": 16850 + }, + { + "epoch": 0.77, + "learning_rate": 4.692904807299469e-06, + "loss": 0.6361, + "step": 16860 + }, + { + "epoch": 0.77, + "learning_rate": 4.683825865904036e-06, + "loss": 0.5547, + "step": 16870 + }, + { + "epoch": 0.77, + "learning_rate": 4.6747469245086025e-06, + "loss": 0.5598, + "step": 16880 + }, + { + "epoch": 0.77, + "learning_rate": 4.665667983113169e-06, + "loss": 0.6877, + "step": 16890 + }, + { + "epoch": 0.77, + "learning_rate": 4.656589041717736e-06, + "loss": 0.6459, + "step": 16900 + }, + { + "epoch": 0.77, + "learning_rate": 4.647510100322303e-06, + "loss": 0.575, + "step": 16910 + }, + { + "epoch": 0.77, + "learning_rate": 4.638431158926869e-06, + "loss": 0.6687, + "step": 16920 + }, + { + "epoch": 0.77, + "learning_rate": 4.629352217531437e-06, + "loss": 0.6676, + "step": 16930 + }, + { + "epoch": 0.77, + "learning_rate": 4.620273276136003e-06, + "loss": 0.7098, + "step": 16940 + }, + { + "epoch": 0.77, + "learning_rate": 4.611194334740569e-06, + "loss": 0.7119, + "step": 16950 + }, + { + "epoch": 0.77, + "learning_rate": 4.602115393345136e-06, + "loss": 0.5803, + "step": 16960 + }, + { + "epoch": 0.77, + "learning_rate": 4.593036451949703e-06, + "loss": 0.6615, + "step": 16970 + }, + { + "epoch": 0.77, + "learning_rate": 4.58395751055427e-06, + "loss": 0.6672, + "step": 16980 + }, + { + "epoch": 0.77, + "learning_rate": 4.574878569158836e-06, + "loss": 0.7334, + "step": 16990 + }, + { + "epoch": 0.77, + "learning_rate": 4.565799627763403e-06, + "loss": 0.7033, + "step": 17000 + }, + { + "epoch": 0.77, + "eval_accuracy": 0.6286817962337036, + "eval_loss": 0.6592499613761902, + "eval_runtime": 72.898, + "eval_samples_per_second": 56.819, + "eval_steps_per_second": 14.212, + "step": 17000 + }, + { + "epoch": 0.77, + "learning_rate": 4.5567206863679695e-06, + "loss": 0.6502, + "step": 17010 + }, + { + "epoch": 0.77, + "learning_rate": 4.547641744972536e-06, + "loss": 0.7656, + "step": 17020 + }, + { + "epoch": 0.77, + "learning_rate": 4.538562803577104e-06, + "loss": 0.6209, + "step": 17030 + }, + { + "epoch": 0.77, + "learning_rate": 4.52948386218167e-06, + "loss": 0.624, + "step": 17040 + }, + { + "epoch": 0.77, + "learning_rate": 4.520404920786236e-06, + "loss": 0.6344, + "step": 17050 + }, + { + "epoch": 0.77, + "learning_rate": 4.511325979390803e-06, + "loss": 0.6299, + "step": 17060 + }, + { + "epoch": 0.77, + "learning_rate": 4.5022470379953706e-06, + "loss": 0.6219, + "step": 17070 + }, + { + "epoch": 0.78, + "learning_rate": 4.493168096599937e-06, + "loss": 0.6348, + "step": 17080 + }, + { + "epoch": 0.78, + "learning_rate": 4.484089155204503e-06, + "loss": 0.7537, + "step": 17090 + }, + { + "epoch": 0.78, + "learning_rate": 4.47501021380907e-06, + "loss": 0.5959, + "step": 17100 + }, + { + "epoch": 0.78, + "learning_rate": 4.4659312724136365e-06, + "loss": 0.7732, + "step": 17110 + }, + { + "epoch": 0.78, + "learning_rate": 4.456852331018204e-06, + "loss": 0.6707, + "step": 17120 + }, + { + "epoch": 0.78, + "learning_rate": 4.447773389622771e-06, + "loss": 0.6881, + "step": 17130 + }, + { + "epoch": 0.78, + "learning_rate": 4.438694448227337e-06, + "loss": 0.6984, + "step": 17140 + }, + { + "epoch": 0.78, + "learning_rate": 4.429615506831903e-06, + "loss": 0.5365, + "step": 17150 + }, + { + "epoch": 0.78, + "learning_rate": 4.420536565436471e-06, + "loss": 0.6693, + "step": 17160 + }, + { + "epoch": 0.78, + "learning_rate": 4.4114576240410375e-06, + "loss": 0.6051, + "step": 17170 + }, + { + "epoch": 0.78, + "learning_rate": 4.402378682645604e-06, + "loss": 0.6814, + "step": 17180 + }, + { + "epoch": 0.78, + "learning_rate": 4.39329974125017e-06, + "loss": 0.7059, + "step": 17190 + }, + { + "epoch": 0.78, + "learning_rate": 4.384220799854737e-06, + "loss": 0.6785, + "step": 17200 + }, + { + "epoch": 0.78, + "learning_rate": 4.375141858459304e-06, + "loss": 0.5293, + "step": 17210 + }, + { + "epoch": 0.78, + "learning_rate": 4.366062917063871e-06, + "loss": 0.7861, + "step": 17220 + }, + { + "epoch": 0.78, + "learning_rate": 4.356983975668438e-06, + "loss": 0.6998, + "step": 17230 + }, + { + "epoch": 0.78, + "learning_rate": 4.3479050342730035e-06, + "loss": 0.6299, + "step": 17240 + }, + { + "epoch": 0.78, + "learning_rate": 4.338826092877571e-06, + "loss": 0.5969, + "step": 17250 + }, + { + "epoch": 0.78, + "learning_rate": 4.329747151482138e-06, + "loss": 0.727, + "step": 17260 + }, + { + "epoch": 0.78, + "learning_rate": 4.3206682100867045e-06, + "loss": 0.5861, + "step": 17270 + }, + { + "epoch": 0.78, + "learning_rate": 4.311589268691271e-06, + "loss": 0.6994, + "step": 17280 + }, + { + "epoch": 0.78, + "learning_rate": 4.302510327295837e-06, + "loss": 0.6826, + "step": 17290 + }, + { + "epoch": 0.79, + "learning_rate": 4.2934313859004046e-06, + "loss": 0.5617, + "step": 17300 + }, + { + "epoch": 0.79, + "learning_rate": 4.284352444504971e-06, + "loss": 0.6279, + "step": 17310 + }, + { + "epoch": 0.79, + "learning_rate": 4.275273503109538e-06, + "loss": 0.5873, + "step": 17320 + }, + { + "epoch": 0.79, + "learning_rate": 4.266194561714105e-06, + "loss": 0.742, + "step": 17330 + }, + { + "epoch": 0.79, + "learning_rate": 4.257115620318671e-06, + "loss": 0.7314, + "step": 17340 + }, + { + "epoch": 0.79, + "learning_rate": 4.248036678923238e-06, + "loss": 0.551, + "step": 17350 + }, + { + "epoch": 0.79, + "learning_rate": 4.238957737527805e-06, + "loss": 0.7438, + "step": 17360 + }, + { + "epoch": 0.79, + "learning_rate": 4.2298787961323714e-06, + "loss": 0.6326, + "step": 17370 + }, + { + "epoch": 0.79, + "learning_rate": 4.220799854736938e-06, + "loss": 0.6129, + "step": 17380 + }, + { + "epoch": 0.79, + "learning_rate": 4.211720913341505e-06, + "loss": 0.802, + "step": 17390 + }, + { + "epoch": 0.79, + "learning_rate": 4.2026419719460715e-06, + "loss": 0.6312, + "step": 17400 + }, + { + "epoch": 0.79, + "learning_rate": 4.193563030550638e-06, + "loss": 0.6674, + "step": 17410 + }, + { + "epoch": 0.79, + "learning_rate": 4.184484089155205e-06, + "loss": 0.8195, + "step": 17420 + }, + { + "epoch": 0.79, + "learning_rate": 4.175405147759772e-06, + "loss": 0.6348, + "step": 17430 + }, + { + "epoch": 0.79, + "learning_rate": 4.166326206364338e-06, + "loss": 0.6832, + "step": 17440 + }, + { + "epoch": 0.79, + "learning_rate": 4.157247264968905e-06, + "loss": 0.5834, + "step": 17450 + }, + { + "epoch": 0.79, + "learning_rate": 4.148168323573472e-06, + "loss": 0.6127, + "step": 17460 + }, + { + "epoch": 0.79, + "learning_rate": 4.139089382178038e-06, + "loss": 0.6381, + "step": 17470 + }, + { + "epoch": 0.79, + "learning_rate": 4.130010440782605e-06, + "loss": 0.5922, + "step": 17480 + }, + { + "epoch": 0.79, + "learning_rate": 4.120931499387172e-06, + "loss": 0.6482, + "step": 17490 + }, + { + "epoch": 0.79, + "learning_rate": 4.1118525579917385e-06, + "loss": 0.6711, + "step": 17500 + }, + { + "epoch": 0.79, + "eval_accuracy": 0.6301303718010622, + "eval_loss": 0.6614124178886414, + "eval_runtime": 72.5414, + "eval_samples_per_second": 57.098, + "eval_steps_per_second": 14.281, + "step": 17500 + }, + { + "epoch": 0.79, + "learning_rate": 4.102773616596305e-06, + "loss": 0.5473, + "step": 17510 + }, + { + "epoch": 0.8, + "learning_rate": 4.093694675200872e-06, + "loss": 0.6637, + "step": 17520 + }, + { + "epoch": 0.8, + "learning_rate": 4.084615733805439e-06, + "loss": 0.724, + "step": 17530 + }, + { + "epoch": 0.8, + "learning_rate": 4.075536792410005e-06, + "loss": 0.615, + "step": 17540 + }, + { + "epoch": 0.8, + "learning_rate": 4.066457851014572e-06, + "loss": 0.6775, + "step": 17550 + }, + { + "epoch": 0.8, + "learning_rate": 4.057378909619139e-06, + "loss": 0.6363, + "step": 17560 + }, + { + "epoch": 0.8, + "learning_rate": 4.048299968223705e-06, + "loss": 0.7193, + "step": 17570 + }, + { + "epoch": 0.8, + "learning_rate": 4.039221026828272e-06, + "loss": 0.6891, + "step": 17580 + }, + { + "epoch": 0.8, + "learning_rate": 4.030142085432839e-06, + "loss": 0.7309, + "step": 17590 + }, + { + "epoch": 0.8, + "learning_rate": 4.0210631440374055e-06, + "loss": 0.8832, + "step": 17600 + }, + { + "epoch": 0.8, + "learning_rate": 4.011984202641972e-06, + "loss": 0.651, + "step": 17610 + }, + { + "epoch": 0.8, + "learning_rate": 4.002905261246539e-06, + "loss": 0.7375, + "step": 17620 + }, + { + "epoch": 0.8, + "learning_rate": 3.9938263198511055e-06, + "loss": 0.785, + "step": 17630 + }, + { + "epoch": 0.8, + "learning_rate": 3.984747378455672e-06, + "loss": 0.7391, + "step": 17640 + }, + { + "epoch": 0.8, + "learning_rate": 3.975668437060239e-06, + "loss": 0.6408, + "step": 17650 + }, + { + "epoch": 0.8, + "learning_rate": 3.966589495664806e-06, + "loss": 0.5838, + "step": 17660 + }, + { + "epoch": 0.8, + "learning_rate": 3.957510554269372e-06, + "loss": 0.5918, + "step": 17670 + }, + { + "epoch": 0.8, + "learning_rate": 3.948431612873939e-06, + "loss": 0.5525, + "step": 17680 + }, + { + "epoch": 0.8, + "learning_rate": 3.939352671478506e-06, + "loss": 0.7316, + "step": 17690 + }, + { + "epoch": 0.8, + "learning_rate": 3.930273730083073e-06, + "loss": 0.7426, + "step": 17700 + }, + { + "epoch": 0.8, + "learning_rate": 3.921194788687639e-06, + "loss": 0.6811, + "step": 17710 + }, + { + "epoch": 0.8, + "learning_rate": 3.912115847292206e-06, + "loss": 0.6797, + "step": 17720 + }, + { + "epoch": 0.8, + "learning_rate": 3.9030369058967725e-06, + "loss": 0.5119, + "step": 17730 + }, + { + "epoch": 0.81, + "learning_rate": 3.893957964501339e-06, + "loss": 0.6432, + "step": 17740 + }, + { + "epoch": 0.81, + "learning_rate": 3.884879023105907e-06, + "loss": 0.6939, + "step": 17750 + }, + { + "epoch": 0.81, + "learning_rate": 3.875800081710473e-06, + "loss": 0.6508, + "step": 17760 + }, + { + "epoch": 0.81, + "learning_rate": 3.866721140315039e-06, + "loss": 0.5836, + "step": 17770 + }, + { + "epoch": 0.81, + "learning_rate": 3.857642198919606e-06, + "loss": 0.6607, + "step": 17780 + }, + { + "epoch": 0.81, + "learning_rate": 3.8485632575241735e-06, + "loss": 0.7164, + "step": 17790 + }, + { + "epoch": 0.81, + "learning_rate": 3.83948431612874e-06, + "loss": 0.6176, + "step": 17800 + }, + { + "epoch": 0.81, + "learning_rate": 3.830405374733306e-06, + "loss": 0.6244, + "step": 17810 + }, + { + "epoch": 0.81, + "learning_rate": 3.821326433337873e-06, + "loss": 0.6908, + "step": 17820 + }, + { + "epoch": 0.81, + "learning_rate": 3.8122474919424395e-06, + "loss": 0.7539, + "step": 17830 + }, + { + "epoch": 0.81, + "learning_rate": 3.8031685505470066e-06, + "loss": 0.7873, + "step": 17840 + }, + { + "epoch": 0.81, + "learning_rate": 3.7940896091515733e-06, + "loss": 0.6102, + "step": 17850 + }, + { + "epoch": 0.81, + "learning_rate": 3.78501066775614e-06, + "loss": 0.5908, + "step": 17860 + }, + { + "epoch": 0.81, + "learning_rate": 3.7759317263607067e-06, + "loss": 0.5654, + "step": 17870 + }, + { + "epoch": 0.81, + "learning_rate": 3.766852784965274e-06, + "loss": 0.7463, + "step": 17880 + }, + { + "epoch": 0.81, + "learning_rate": 3.75777384356984e-06, + "loss": 0.6754, + "step": 17890 + }, + { + "epoch": 0.81, + "learning_rate": 3.7486949021744068e-06, + "loss": 0.7715, + "step": 17900 + }, + { + "epoch": 0.81, + "learning_rate": 3.7396159607789735e-06, + "loss": 0.6137, + "step": 17910 + }, + { + "epoch": 0.81, + "learning_rate": 3.73053701938354e-06, + "loss": 0.593, + "step": 17920 + }, + { + "epoch": 0.81, + "learning_rate": 3.7214580779881073e-06, + "loss": 0.6748, + "step": 17930 + }, + { + "epoch": 0.81, + "learning_rate": 3.7123791365926735e-06, + "loss": 0.618, + "step": 17940 + }, + { + "epoch": 0.81, + "learning_rate": 3.7033001951972402e-06, + "loss": 0.5645, + "step": 17950 + }, + { + "epoch": 0.82, + "learning_rate": 3.694221253801807e-06, + "loss": 0.684, + "step": 17960 + }, + { + "epoch": 0.82, + "learning_rate": 3.685142312406374e-06, + "loss": 0.634, + "step": 17970 + }, + { + "epoch": 0.82, + "learning_rate": 3.6760633710109408e-06, + "loss": 0.6729, + "step": 17980 + }, + { + "epoch": 0.82, + "learning_rate": 3.666984429615507e-06, + "loss": 0.7057, + "step": 17990 + }, + { + "epoch": 0.82, + "learning_rate": 3.6579054882200737e-06, + "loss": 0.6484, + "step": 18000 + }, + { + "epoch": 0.82, + "eval_accuracy": 0.6308546595847416, + "eval_loss": 0.659805417060852, + "eval_runtime": 73.2829, + "eval_samples_per_second": 56.521, + "eval_steps_per_second": 14.137, + "step": 18000 + }, + { + "epoch": 0.82, + "learning_rate": 3.6488265468246404e-06, + "loss": 0.6398, + "step": 18010 + }, + { + "epoch": 0.82, + "learning_rate": 3.6397476054292075e-06, + "loss": 0.5816, + "step": 18020 + }, + { + "epoch": 0.82, + "learning_rate": 3.6306686640337742e-06, + "loss": 0.6373, + "step": 18030 + }, + { + "epoch": 0.82, + "learning_rate": 3.6215897226383405e-06, + "loss": 0.6348, + "step": 18040 + }, + { + "epoch": 0.82, + "learning_rate": 3.612510781242907e-06, + "loss": 0.6752, + "step": 18050 + }, + { + "epoch": 0.82, + "learning_rate": 3.6034318398474743e-06, + "loss": 0.7084, + "step": 18060 + }, + { + "epoch": 0.82, + "learning_rate": 3.594352898452041e-06, + "loss": 0.5984, + "step": 18070 + }, + { + "epoch": 0.82, + "learning_rate": 3.5852739570566077e-06, + "loss": 0.5811, + "step": 18080 + }, + { + "epoch": 0.82, + "learning_rate": 3.576195015661174e-06, + "loss": 0.7691, + "step": 18090 + }, + { + "epoch": 0.82, + "learning_rate": 3.5671160742657407e-06, + "loss": 0.6145, + "step": 18100 + }, + { + "epoch": 0.82, + "learning_rate": 3.558037132870308e-06, + "loss": 0.6488, + "step": 18110 + }, + { + "epoch": 0.82, + "learning_rate": 3.5489581914748745e-06, + "loss": 0.6141, + "step": 18120 + }, + { + "epoch": 0.82, + "learning_rate": 3.539879250079441e-06, + "loss": 0.8035, + "step": 18130 + }, + { + "epoch": 0.82, + "learning_rate": 3.5308003086840075e-06, + "loss": 0.6059, + "step": 18140 + }, + { + "epoch": 0.82, + "learning_rate": 3.521721367288574e-06, + "loss": 0.6791, + "step": 18150 + }, + { + "epoch": 0.82, + "learning_rate": 3.5126424258931413e-06, + "loss": 0.5494, + "step": 18160 + }, + { + "epoch": 0.82, + "learning_rate": 3.503563484497708e-06, + "loss": 0.6541, + "step": 18170 + }, + { + "epoch": 0.83, + "learning_rate": 3.4944845431022747e-06, + "loss": 0.6811, + "step": 18180 + }, + { + "epoch": 0.83, + "learning_rate": 3.485405601706841e-06, + "loss": 0.6953, + "step": 18190 + }, + { + "epoch": 0.83, + "learning_rate": 3.476326660311408e-06, + "loss": 0.7562, + "step": 18200 + }, + { + "epoch": 0.83, + "learning_rate": 3.4672477189159748e-06, + "loss": 0.649, + "step": 18210 + }, + { + "epoch": 0.83, + "learning_rate": 3.4581687775205415e-06, + "loss": 0.692, + "step": 18220 + }, + { + "epoch": 0.83, + "learning_rate": 3.449089836125108e-06, + "loss": 0.633, + "step": 18230 + }, + { + "epoch": 0.83, + "learning_rate": 3.4400108947296744e-06, + "loss": 0.6006, + "step": 18240 + }, + { + "epoch": 0.83, + "learning_rate": 3.4309319533342416e-06, + "loss": 0.526, + "step": 18250 + }, + { + "epoch": 0.83, + "learning_rate": 3.4218530119388082e-06, + "loss": 0.6, + "step": 18260 + }, + { + "epoch": 0.83, + "learning_rate": 3.412774070543375e-06, + "loss": 0.7064, + "step": 18270 + }, + { + "epoch": 0.83, + "learning_rate": 3.4036951291479412e-06, + "loss": 0.7047, + "step": 18280 + }, + { + "epoch": 0.83, + "learning_rate": 3.3946161877525088e-06, + "loss": 0.7266, + "step": 18290 + }, + { + "epoch": 0.83, + "learning_rate": 3.385537246357075e-06, + "loss": 0.6955, + "step": 18300 + }, + { + "epoch": 0.83, + "learning_rate": 3.3764583049616417e-06, + "loss": 0.5686, + "step": 18310 + }, + { + "epoch": 0.83, + "learning_rate": 3.3673793635662084e-06, + "loss": 0.5816, + "step": 18320 + }, + { + "epoch": 0.83, + "learning_rate": 3.3583004221707747e-06, + "loss": 0.6555, + "step": 18330 + }, + { + "epoch": 0.83, + "learning_rate": 3.3492214807753422e-06, + "loss": 0.5975, + "step": 18340 + }, + { + "epoch": 0.83, + "learning_rate": 3.3401425393799085e-06, + "loss": 0.6977, + "step": 18350 + }, + { + "epoch": 0.83, + "learning_rate": 3.331063597984475e-06, + "loss": 0.6643, + "step": 18360 + }, + { + "epoch": 0.83, + "learning_rate": 3.321984656589042e-06, + "loss": 0.7504, + "step": 18370 + }, + { + "epoch": 0.83, + "learning_rate": 3.312905715193609e-06, + "loss": 0.6539, + "step": 18380 + }, + { + "epoch": 0.83, + "learning_rate": 3.3038267737981757e-06, + "loss": 0.7016, + "step": 18390 + }, + { + "epoch": 0.84, + "learning_rate": 3.294747832402742e-06, + "loss": 0.5623, + "step": 18400 + }, + { + "epoch": 0.84, + "learning_rate": 3.2856688910073087e-06, + "loss": 0.5807, + "step": 18410 + }, + { + "epoch": 0.84, + "learning_rate": 3.2765899496118754e-06, + "loss": 0.701, + "step": 18420 + }, + { + "epoch": 0.84, + "learning_rate": 3.2675110082164425e-06, + "loss": 0.6535, + "step": 18430 + }, + { + "epoch": 0.84, + "learning_rate": 3.2584320668210088e-06, + "loss": 0.6252, + "step": 18440 + }, + { + "epoch": 0.84, + "learning_rate": 3.2493531254255755e-06, + "loss": 0.576, + "step": 18450 + }, + { + "epoch": 0.84, + "learning_rate": 3.240274184030142e-06, + "loss": 0.7225, + "step": 18460 + }, + { + "epoch": 0.84, + "learning_rate": 3.2311952426347093e-06, + "loss": 0.574, + "step": 18470 + }, + { + "epoch": 0.84, + "learning_rate": 3.222116301239276e-06, + "loss": 0.6803, + "step": 18480 + }, + { + "epoch": 0.84, + "learning_rate": 3.2130373598438423e-06, + "loss": 0.6945, + "step": 18490 + }, + { + "epoch": 0.84, + "learning_rate": 3.203958418448409e-06, + "loss": 0.5729, + "step": 18500 + }, + { + "epoch": 0.84, + "eval_accuracy": 0.6269917914051183, + "eval_loss": 0.6618302464485168, + "eval_runtime": 74.0627, + "eval_samples_per_second": 55.926, + "eval_steps_per_second": 13.988, + "step": 18500 + }, + { + "epoch": 0.84, + "learning_rate": 3.1948794770529757e-06, + "loss": 0.6121, + "step": 18510 + }, + { + "epoch": 0.84, + "learning_rate": 3.1858005356575428e-06, + "loss": 0.632, + "step": 18520 + }, + { + "epoch": 0.84, + "learning_rate": 3.1767215942621095e-06, + "loss": 0.5984, + "step": 18530 + }, + { + "epoch": 0.84, + "learning_rate": 3.1676426528666757e-06, + "loss": 0.8053, + "step": 18540 + }, + { + "epoch": 0.84, + "learning_rate": 3.1585637114712424e-06, + "loss": 0.7311, + "step": 18550 + }, + { + "epoch": 0.84, + "learning_rate": 3.1494847700758096e-06, + "loss": 0.6496, + "step": 18560 + }, + { + "epoch": 0.84, + "learning_rate": 3.1404058286803763e-06, + "loss": 0.7105, + "step": 18570 + }, + { + "epoch": 0.84, + "learning_rate": 3.131326887284943e-06, + "loss": 0.6768, + "step": 18580 + }, + { + "epoch": 0.84, + "learning_rate": 3.1222479458895092e-06, + "loss": 0.7305, + "step": 18590 + }, + { + "epoch": 0.84, + "learning_rate": 3.113169004494076e-06, + "loss": 0.8035, + "step": 18600 + }, + { + "epoch": 0.84, + "learning_rate": 3.104090063098643e-06, + "loss": 0.6086, + "step": 18610 + }, + { + "epoch": 0.85, + "learning_rate": 3.0950111217032097e-06, + "loss": 0.6119, + "step": 18620 + }, + { + "epoch": 0.85, + "learning_rate": 3.0859321803077764e-06, + "loss": 0.7191, + "step": 18630 + }, + { + "epoch": 0.85, + "learning_rate": 3.0768532389123427e-06, + "loss": 0.648, + "step": 18640 + }, + { + "epoch": 0.85, + "learning_rate": 3.06777429751691e-06, + "loss": 0.7826, + "step": 18650 + }, + { + "epoch": 0.85, + "learning_rate": 3.0586953561214765e-06, + "loss": 0.7137, + "step": 18660 + }, + { + "epoch": 0.85, + "learning_rate": 3.0496164147260432e-06, + "loss": 0.8223, + "step": 18670 + }, + { + "epoch": 0.85, + "learning_rate": 3.04053747333061e-06, + "loss": 0.7203, + "step": 18680 + }, + { + "epoch": 0.85, + "learning_rate": 3.031458531935176e-06, + "loss": 0.6809, + "step": 18690 + }, + { + "epoch": 0.85, + "learning_rate": 3.0223795905397433e-06, + "loss": 0.6203, + "step": 18700 + }, + { + "epoch": 0.85, + "learning_rate": 3.01330064914431e-06, + "loss": 0.6621, + "step": 18710 + }, + { + "epoch": 0.85, + "learning_rate": 3.0042217077488767e-06, + "loss": 0.6363, + "step": 18720 + }, + { + "epoch": 0.85, + "learning_rate": 2.9951427663534434e-06, + "loss": 0.6408, + "step": 18730 + }, + { + "epoch": 0.85, + "learning_rate": 2.9860638249580105e-06, + "loss": 0.5842, + "step": 18740 + }, + { + "epoch": 0.85, + "learning_rate": 2.9769848835625768e-06, + "loss": 0.7553, + "step": 18750 + }, + { + "epoch": 0.85, + "learning_rate": 2.9679059421671435e-06, + "loss": 0.7262, + "step": 18760 + }, + { + "epoch": 0.85, + "learning_rate": 2.95882700077171e-06, + "loss": 0.6549, + "step": 18770 + }, + { + "epoch": 0.85, + "learning_rate": 2.949748059376277e-06, + "loss": 0.7242, + "step": 18780 + }, + { + "epoch": 0.85, + "learning_rate": 2.940669117980844e-06, + "loss": 0.625, + "step": 18790 + }, + { + "epoch": 0.85, + "learning_rate": 2.9315901765854103e-06, + "loss": 0.657, + "step": 18800 + }, + { + "epoch": 0.85, + "learning_rate": 2.922511235189977e-06, + "loss": 0.7918, + "step": 18810 + }, + { + "epoch": 0.85, + "learning_rate": 2.9134322937945437e-06, + "loss": 0.6266, + "step": 18820 + }, + { + "epoch": 0.85, + "learning_rate": 2.9043533523991108e-06, + "loss": 0.6428, + "step": 18830 + }, + { + "epoch": 0.86, + "learning_rate": 2.8952744110036775e-06, + "loss": 0.7258, + "step": 18840 + }, + { + "epoch": 0.86, + "learning_rate": 2.8861954696082437e-06, + "loss": 0.6338, + "step": 18850 + }, + { + "epoch": 0.86, + "learning_rate": 2.8771165282128104e-06, + "loss": 0.5566, + "step": 18860 + }, + { + "epoch": 0.86, + "learning_rate": 2.868037586817377e-06, + "loss": 0.6768, + "step": 18870 + }, + { + "epoch": 0.86, + "learning_rate": 2.8589586454219443e-06, + "loss": 0.7463, + "step": 18880 + }, + { + "epoch": 0.86, + "learning_rate": 2.849879704026511e-06, + "loss": 0.7348, + "step": 18890 + }, + { + "epoch": 0.86, + "learning_rate": 2.8408007626310772e-06, + "loss": 0.741, + "step": 18900 + }, + { + "epoch": 0.86, + "learning_rate": 2.831721821235644e-06, + "loss": 0.7045, + "step": 18910 + }, + { + "epoch": 0.86, + "learning_rate": 2.822642879840211e-06, + "loss": 0.6104, + "step": 18920 + }, + { + "epoch": 0.86, + "learning_rate": 2.8135639384447777e-06, + "loss": 0.5941, + "step": 18930 + }, + { + "epoch": 0.86, + "learning_rate": 2.8044849970493444e-06, + "loss": 0.7625, + "step": 18940 + }, + { + "epoch": 0.86, + "learning_rate": 2.7954060556539107e-06, + "loss": 0.6734, + "step": 18950 + }, + { + "epoch": 0.86, + "learning_rate": 2.7863271142584774e-06, + "loss": 0.7344, + "step": 18960 + }, + { + "epoch": 0.86, + "learning_rate": 2.7772481728630445e-06, + "loss": 0.6715, + "step": 18970 + }, + { + "epoch": 0.86, + "learning_rate": 2.7681692314676112e-06, + "loss": 0.7699, + "step": 18980 + }, + { + "epoch": 0.86, + "learning_rate": 2.759090290072178e-06, + "loss": 0.6113, + "step": 18990 + }, + { + "epoch": 0.86, + "learning_rate": 2.750011348676744e-06, + "loss": 0.634, + "step": 19000 + }, + { + "epoch": 0.86, + "eval_accuracy": 0.629406084017383, + "eval_loss": 0.6582936644554138, + "eval_runtime": 74.1488, + "eval_samples_per_second": 55.861, + "eval_steps_per_second": 13.972, + "step": 19000 + }, + { + "epoch": 0.86, + "learning_rate": 2.7409324072813113e-06, + "loss": 0.5986, + "step": 19010 + }, + { + "epoch": 0.86, + "learning_rate": 2.731853465885878e-06, + "loss": 0.7604, + "step": 19020 + }, + { + "epoch": 0.86, + "learning_rate": 2.7227745244904447e-06, + "loss": 0.6496, + "step": 19030 + }, + { + "epoch": 0.86, + "learning_rate": 2.7136955830950114e-06, + "loss": 0.6102, + "step": 19040 + }, + { + "epoch": 0.86, + "learning_rate": 2.7046166416995777e-06, + "loss": 0.7336, + "step": 19050 + }, + { + "epoch": 0.87, + "learning_rate": 2.6955377003041448e-06, + "loss": 0.7105, + "step": 19060 + }, + { + "epoch": 0.87, + "learning_rate": 2.6864587589087115e-06, + "loss": 0.6516, + "step": 19070 + }, + { + "epoch": 0.87, + "learning_rate": 2.677379817513278e-06, + "loss": 0.6199, + "step": 19080 + }, + { + "epoch": 0.87, + "learning_rate": 2.668300876117845e-06, + "loss": 0.6559, + "step": 19090 + }, + { + "epoch": 0.87, + "learning_rate": 2.659221934722412e-06, + "loss": 0.6232, + "step": 19100 + }, + { + "epoch": 0.87, + "learning_rate": 2.6501429933269783e-06, + "loss": 0.634, + "step": 19110 + }, + { + "epoch": 0.87, + "learning_rate": 2.641064051931545e-06, + "loss": 0.6158, + "step": 19120 + }, + { + "epoch": 0.87, + "learning_rate": 2.6319851105361117e-06, + "loss": 0.685, + "step": 19130 + }, + { + "epoch": 0.87, + "learning_rate": 2.6229061691406784e-06, + "loss": 0.6271, + "step": 19140 + }, + { + "epoch": 0.87, + "learning_rate": 2.6138272277452455e-06, + "loss": 0.6379, + "step": 19150 + }, + { + "epoch": 0.87, + "learning_rate": 2.6047482863498117e-06, + "loss": 0.6584, + "step": 19160 + }, + { + "epoch": 0.87, + "learning_rate": 2.5956693449543784e-06, + "loss": 0.6168, + "step": 19170 + }, + { + "epoch": 0.87, + "learning_rate": 2.586590403558945e-06, + "loss": 0.5928, + "step": 19180 + }, + { + "epoch": 0.87, + "learning_rate": 2.577511462163512e-06, + "loss": 0.5943, + "step": 19190 + }, + { + "epoch": 0.87, + "learning_rate": 2.568432520768079e-06, + "loss": 0.7664, + "step": 19200 + }, + { + "epoch": 0.87, + "learning_rate": 2.5593535793726452e-06, + "loss": 0.698, + "step": 19210 + }, + { + "epoch": 0.87, + "learning_rate": 2.550274637977212e-06, + "loss": 0.6602, + "step": 19220 + }, + { + "epoch": 0.87, + "learning_rate": 2.5411956965817786e-06, + "loss": 0.6725, + "step": 19230 + }, + { + "epoch": 0.87, + "learning_rate": 2.5321167551863457e-06, + "loss": 0.6062, + "step": 19240 + }, + { + "epoch": 0.87, + "learning_rate": 2.5230378137909124e-06, + "loss": 0.6545, + "step": 19250 + }, + { + "epoch": 0.87, + "learning_rate": 2.5139588723954787e-06, + "loss": 0.5836, + "step": 19260 + }, + { + "epoch": 0.87, + "learning_rate": 2.5048799310000454e-06, + "loss": 0.6107, + "step": 19270 + }, + { + "epoch": 0.88, + "learning_rate": 2.495800989604612e-06, + "loss": 0.6293, + "step": 19280 + }, + { + "epoch": 0.88, + "learning_rate": 2.486722048209179e-06, + "loss": 0.6992, + "step": 19290 + }, + { + "epoch": 0.88, + "learning_rate": 2.477643106813746e-06, + "loss": 0.5889, + "step": 19300 + }, + { + "epoch": 0.88, + "learning_rate": 2.468564165418312e-06, + "loss": 0.6504, + "step": 19310 + }, + { + "epoch": 0.88, + "learning_rate": 2.4594852240228793e-06, + "loss": 0.6891, + "step": 19320 + }, + { + "epoch": 0.88, + "learning_rate": 2.4504062826274456e-06, + "loss": 0.6273, + "step": 19330 + }, + { + "epoch": 0.88, + "learning_rate": 2.4413273412320127e-06, + "loss": 0.6385, + "step": 19340 + }, + { + "epoch": 0.88, + "learning_rate": 2.4322483998365794e-06, + "loss": 0.6826, + "step": 19350 + }, + { + "epoch": 0.88, + "learning_rate": 2.423169458441146e-06, + "loss": 0.6477, + "step": 19360 + }, + { + "epoch": 0.88, + "learning_rate": 2.414090517045713e-06, + "loss": 0.6295, + "step": 19370 + }, + { + "epoch": 0.88, + "learning_rate": 2.405011575650279e-06, + "loss": 0.6473, + "step": 19380 + }, + { + "epoch": 0.88, + "learning_rate": 2.395932634254846e-06, + "loss": 0.7258, + "step": 19390 + }, + { + "epoch": 0.88, + "learning_rate": 2.386853692859413e-06, + "loss": 0.7238, + "step": 19400 + }, + { + "epoch": 0.88, + "learning_rate": 2.3777747514639796e-06, + "loss": 0.602, + "step": 19410 + }, + { + "epoch": 0.88, + "learning_rate": 2.3686958100685463e-06, + "loss": 0.6506, + "step": 19420 + }, + { + "epoch": 0.88, + "learning_rate": 2.359616868673113e-06, + "loss": 0.657, + "step": 19430 + }, + { + "epoch": 0.88, + "learning_rate": 2.3505379272776797e-06, + "loss": 0.5479, + "step": 19440 + }, + { + "epoch": 0.88, + "learning_rate": 2.3414589858822464e-06, + "loss": 0.7652, + "step": 19450 + }, + { + "epoch": 0.88, + "learning_rate": 2.332380044486813e-06, + "loss": 0.6119, + "step": 19460 + }, + { + "epoch": 0.88, + "learning_rate": 2.3233011030913798e-06, + "loss": 0.625, + "step": 19470 + }, + { + "epoch": 0.88, + "learning_rate": 2.3142221616959464e-06, + "loss": 0.634, + "step": 19480 + }, + { + "epoch": 0.88, + "learning_rate": 2.305143220300513e-06, + "loss": 0.6096, + "step": 19490 + }, + { + "epoch": 0.89, + "learning_rate": 2.29606427890508e-06, + "loss": 0.6139, + "step": 19500 + }, + { + "epoch": 0.89, + "eval_accuracy": 0.6296475132786093, + "eval_loss": 0.6594942212104797, + "eval_runtime": 73.3398, + "eval_samples_per_second": 56.477, + "eval_steps_per_second": 14.126, + "step": 19500 + }, + { + "epoch": 0.89, + "learning_rate": 2.2869853375096465e-06, + "loss": 0.6381, + "step": 19510 + }, + { + "epoch": 0.89, + "learning_rate": 2.2779063961142132e-06, + "loss": 0.6336, + "step": 19520 + }, + { + "epoch": 0.89, + "learning_rate": 2.26882745471878e-06, + "loss": 0.7096, + "step": 19530 + }, + { + "epoch": 0.89, + "learning_rate": 2.2597485133233466e-06, + "loss": 0.6396, + "step": 19540 + }, + { + "epoch": 0.89, + "learning_rate": 2.2506695719279133e-06, + "loss": 0.6719, + "step": 19550 + }, + { + "epoch": 0.89, + "learning_rate": 2.24159063053248e-06, + "loss": 0.6197, + "step": 19560 + }, + { + "epoch": 0.89, + "learning_rate": 2.2325116891370467e-06, + "loss": 0.6291, + "step": 19570 + }, + { + "epoch": 0.89, + "learning_rate": 2.2234327477416134e-06, + "loss": 0.6397, + "step": 19580 + }, + { + "epoch": 0.89, + "learning_rate": 2.21435380634618e-06, + "loss": 0.7871, + "step": 19590 + }, + { + "epoch": 0.89, + "learning_rate": 2.205274864950747e-06, + "loss": 0.7424, + "step": 19600 + }, + { + "epoch": 0.89, + "learning_rate": 2.196195923555314e-06, + "loss": 0.7363, + "step": 19610 + }, + { + "epoch": 0.89, + "learning_rate": 2.18711698215988e-06, + "loss": 0.7027, + "step": 19620 + }, + { + "epoch": 0.89, + "learning_rate": 2.178038040764447e-06, + "loss": 0.627, + "step": 19630 + }, + { + "epoch": 0.89, + "learning_rate": 2.1689590993690136e-06, + "loss": 0.7043, + "step": 19640 + }, + { + "epoch": 0.89, + "learning_rate": 2.1598801579735803e-06, + "loss": 0.5365, + "step": 19650 + }, + { + "epoch": 0.89, + "learning_rate": 2.1508012165781474e-06, + "loss": 0.607, + "step": 19660 + }, + { + "epoch": 0.89, + "learning_rate": 2.1417222751827137e-06, + "loss": 0.6062, + "step": 19670 + }, + { + "epoch": 0.89, + "learning_rate": 2.132643333787281e-06, + "loss": 0.7021, + "step": 19680 + }, + { + "epoch": 0.89, + "learning_rate": 2.123564392391847e-06, + "loss": 0.5836, + "step": 19690 + }, + { + "epoch": 0.89, + "learning_rate": 2.114485450996414e-06, + "loss": 0.6492, + "step": 19700 + }, + { + "epoch": 0.89, + "learning_rate": 2.105406509600981e-06, + "loss": 0.5834, + "step": 19710 + }, + { + "epoch": 0.9, + "learning_rate": 2.096327568205547e-06, + "loss": 0.5445, + "step": 19720 + }, + { + "epoch": 0.9, + "learning_rate": 2.0872486268101143e-06, + "loss": 0.5725, + "step": 19730 + }, + { + "epoch": 0.9, + "learning_rate": 2.0781696854146805e-06, + "loss": 0.6119, + "step": 19740 + }, + { + "epoch": 0.9, + "learning_rate": 2.0690907440192477e-06, + "loss": 0.5551, + "step": 19750 + }, + { + "epoch": 0.9, + "learning_rate": 2.0600118026238144e-06, + "loss": 0.6842, + "step": 19760 + }, + { + "epoch": 0.9, + "learning_rate": 2.050932861228381e-06, + "loss": 0.6477, + "step": 19770 + }, + { + "epoch": 0.9, + "learning_rate": 2.0418539198329478e-06, + "loss": 0.6268, + "step": 19780 + }, + { + "epoch": 0.9, + "learning_rate": 2.0327749784375144e-06, + "loss": 0.6814, + "step": 19790 + }, + { + "epoch": 0.9, + "learning_rate": 2.023696037042081e-06, + "loss": 0.6207, + "step": 19800 + }, + { + "epoch": 0.9, + "learning_rate": 2.014617095646648e-06, + "loss": 0.5721, + "step": 19810 + }, + { + "epoch": 0.9, + "learning_rate": 2.0055381542512145e-06, + "loss": 0.6545, + "step": 19820 + }, + { + "epoch": 0.9, + "learning_rate": 1.9964592128557812e-06, + "loss": 0.7377, + "step": 19830 + }, + { + "epoch": 0.9, + "learning_rate": 1.987380271460348e-06, + "loss": 0.6426, + "step": 19840 + }, + { + "epoch": 0.9, + "learning_rate": 1.9783013300649146e-06, + "loss": 0.6916, + "step": 19850 + }, + { + "epoch": 0.9, + "learning_rate": 1.9692223886694813e-06, + "loss": 0.609, + "step": 19860 + }, + { + "epoch": 0.9, + "learning_rate": 1.960143447274048e-06, + "loss": 0.7535, + "step": 19870 + }, + { + "epoch": 0.9, + "learning_rate": 1.9510645058786147e-06, + "loss": 0.6238, + "step": 19880 + }, + { + "epoch": 0.9, + "learning_rate": 1.9419855644831814e-06, + "loss": 0.7176, + "step": 19890 + }, + { + "epoch": 0.9, + "learning_rate": 1.932906623087748e-06, + "loss": 0.6641, + "step": 19900 + }, + { + "epoch": 0.9, + "learning_rate": 1.923827681692315e-06, + "loss": 0.5863, + "step": 19910 + }, + { + "epoch": 0.9, + "learning_rate": 1.9147487402968815e-06, + "loss": 0.6531, + "step": 19920 + }, + { + "epoch": 0.9, + "learning_rate": 1.9056697989014484e-06, + "loss": 0.6604, + "step": 19930 + }, + { + "epoch": 0.91, + "learning_rate": 1.8965908575060149e-06, + "loss": 0.6605, + "step": 19940 + }, + { + "epoch": 0.91, + "learning_rate": 1.8875119161105818e-06, + "loss": 0.7225, + "step": 19950 + }, + { + "epoch": 0.91, + "learning_rate": 1.8784329747151483e-06, + "loss": 0.7346, + "step": 19960 + }, + { + "epoch": 0.91, + "learning_rate": 1.8693540333197152e-06, + "loss": 0.6707, + "step": 19970 + }, + { + "epoch": 0.91, + "learning_rate": 1.8602750919242819e-06, + "loss": 0.6119, + "step": 19980 + }, + { + "epoch": 0.91, + "learning_rate": 1.8511961505288484e-06, + "loss": 0.6988, + "step": 19990 + }, + { + "epoch": 0.91, + "learning_rate": 1.8421172091334153e-06, + "loss": 0.6289, + "step": 20000 + }, + { + "epoch": 0.91, + "eval_accuracy": 0.6286817962337036, + "eval_loss": 0.6617236733436584, + "eval_runtime": 74.5822, + "eval_samples_per_second": 55.536, + "eval_steps_per_second": 13.891, + "step": 20000 + }, + { + "epoch": 0.91, + "learning_rate": 1.8330382677379818e-06, + "loss": 0.6711, + "step": 20010 + }, + { + "epoch": 0.91, + "learning_rate": 1.8239593263425487e-06, + "loss": 0.5879, + "step": 20020 + }, + { + "epoch": 0.91, + "learning_rate": 1.8148803849471154e-06, + "loss": 0.7063, + "step": 20030 + }, + { + "epoch": 0.91, + "learning_rate": 1.805801443551682e-06, + "loss": 0.5883, + "step": 20040 + }, + { + "epoch": 0.91, + "learning_rate": 1.7967225021562488e-06, + "loss": 0.5684, + "step": 20050 + }, + { + "epoch": 0.91, + "learning_rate": 1.7876435607608152e-06, + "loss": 0.6596, + "step": 20060 + }, + { + "epoch": 0.91, + "learning_rate": 1.7785646193653822e-06, + "loss": 0.5432, + "step": 20070 + }, + { + "epoch": 0.91, + "learning_rate": 1.7694856779699488e-06, + "loss": 0.5748, + "step": 20080 + }, + { + "epoch": 0.91, + "learning_rate": 1.7604067365745155e-06, + "loss": 0.7727, + "step": 20090 + }, + { + "epoch": 0.91, + "learning_rate": 1.7513277951790822e-06, + "loss": 0.5312, + "step": 20100 + }, + { + "epoch": 0.91, + "learning_rate": 1.7422488537836491e-06, + "loss": 0.717, + "step": 20110 + }, + { + "epoch": 0.91, + "learning_rate": 1.7331699123882156e-06, + "loss": 0.6273, + "step": 20120 + }, + { + "epoch": 0.91, + "learning_rate": 1.7240909709927825e-06, + "loss": 0.627, + "step": 20130 + }, + { + "epoch": 0.91, + "learning_rate": 1.715012029597349e-06, + "loss": 0.8084, + "step": 20140 + }, + { + "epoch": 0.91, + "learning_rate": 1.7059330882019157e-06, + "loss": 0.557, + "step": 20150 + }, + { + "epoch": 0.92, + "learning_rate": 1.6968541468064826e-06, + "loss": 0.75, + "step": 20160 + }, + { + "epoch": 0.92, + "learning_rate": 1.6877752054110491e-06, + "loss": 0.7277, + "step": 20170 + }, + { + "epoch": 0.92, + "learning_rate": 1.678696264015616e-06, + "loss": 0.6887, + "step": 20180 + }, + { + "epoch": 0.92, + "learning_rate": 1.6696173226201825e-06, + "loss": 0.5959, + "step": 20190 + }, + { + "epoch": 0.92, + "learning_rate": 1.6605383812247494e-06, + "loss": 0.6359, + "step": 20200 + }, + { + "epoch": 0.92, + "learning_rate": 1.651459439829316e-06, + "loss": 0.5998, + "step": 20210 + }, + { + "epoch": 0.92, + "learning_rate": 1.6423804984338828e-06, + "loss": 0.585, + "step": 20220 + }, + { + "epoch": 0.92, + "learning_rate": 1.6333015570384495e-06, + "loss": 0.6037, + "step": 20230 + }, + { + "epoch": 0.92, + "learning_rate": 1.624222615643016e-06, + "loss": 0.7137, + "step": 20240 + }, + { + "epoch": 0.92, + "learning_rate": 1.615143674247583e-06, + "loss": 0.7148, + "step": 20250 + }, + { + "epoch": 0.92, + "learning_rate": 1.6060647328521494e-06, + "loss": 0.6832, + "step": 20260 + }, + { + "epoch": 0.92, + "learning_rate": 1.5969857914567163e-06, + "loss": 0.6246, + "step": 20270 + }, + { + "epoch": 0.92, + "learning_rate": 1.587906850061283e-06, + "loss": 0.6691, + "step": 20280 + }, + { + "epoch": 0.92, + "learning_rate": 1.5788279086658497e-06, + "loss": 0.6387, + "step": 20290 + }, + { + "epoch": 0.92, + "learning_rate": 1.5697489672704164e-06, + "loss": 0.6922, + "step": 20300 + }, + { + "epoch": 0.92, + "learning_rate": 1.5606700258749833e-06, + "loss": 0.6711, + "step": 20310 + }, + { + "epoch": 0.92, + "learning_rate": 1.5515910844795498e-06, + "loss": 0.693, + "step": 20320 + }, + { + "epoch": 0.92, + "learning_rate": 1.5425121430841165e-06, + "loss": 0.618, + "step": 20330 + }, + { + "epoch": 0.92, + "learning_rate": 1.5334332016886832e-06, + "loss": 0.6441, + "step": 20340 + }, + { + "epoch": 0.92, + "learning_rate": 1.5243542602932499e-06, + "loss": 0.6623, + "step": 20350 + }, + { + "epoch": 0.92, + "learning_rate": 1.5152753188978168e-06, + "loss": 0.6813, + "step": 20360 + }, + { + "epoch": 0.92, + "learning_rate": 1.5061963775023832e-06, + "loss": 0.6551, + "step": 20370 + }, + { + "epoch": 0.93, + "learning_rate": 1.4971174361069502e-06, + "loss": 0.6084, + "step": 20380 + }, + { + "epoch": 0.93, + "learning_rate": 1.4880384947115166e-06, + "loss": 0.6887, + "step": 20390 + }, + { + "epoch": 0.93, + "learning_rate": 1.4789595533160835e-06, + "loss": 0.7918, + "step": 20400 + }, + { + "epoch": 0.93, + "learning_rate": 1.4698806119206502e-06, + "loss": 0.626, + "step": 20410 + }, + { + "epoch": 0.93, + "learning_rate": 1.4608016705252167e-06, + "loss": 0.6773, + "step": 20420 + }, + { + "epoch": 0.93, + "learning_rate": 1.4517227291297836e-06, + "loss": 0.6996, + "step": 20430 + }, + { + "epoch": 0.93, + "learning_rate": 1.4426437877343501e-06, + "loss": 0.6697, + "step": 20440 + }, + { + "epoch": 0.93, + "learning_rate": 1.433564846338917e-06, + "loss": 0.5686, + "step": 20450 + }, + { + "epoch": 0.93, + "learning_rate": 1.4244859049434837e-06, + "loss": 0.6312, + "step": 20460 + }, + { + "epoch": 0.93, + "learning_rate": 1.4154069635480504e-06, + "loss": 0.6873, + "step": 20470 + }, + { + "epoch": 0.93, + "learning_rate": 1.4063280221526171e-06, + "loss": 0.7268, + "step": 20480 + }, + { + "epoch": 0.93, + "learning_rate": 1.397249080757184e-06, + "loss": 0.6863, + "step": 20490 + }, + { + "epoch": 0.93, + "learning_rate": 1.3881701393617505e-06, + "loss": 0.5889, + "step": 20500 + }, + { + "epoch": 0.93, + "eval_accuracy": 0.6301303718010622, + "eval_loss": 0.6595224738121033, + "eval_runtime": 73.6905, + "eval_samples_per_second": 56.208, + "eval_steps_per_second": 14.059, + "step": 20500 + }, + { + "epoch": 0.93, + "learning_rate": 1.3790911979663172e-06, + "loss": 0.5518, + "step": 20510 + }, + { + "epoch": 0.93, + "learning_rate": 1.370012256570884e-06, + "loss": 0.7367, + "step": 20520 + }, + { + "epoch": 0.93, + "learning_rate": 1.3609333151754506e-06, + "loss": 0.5818, + "step": 20530 + }, + { + "epoch": 0.93, + "learning_rate": 1.3518543737800175e-06, + "loss": 0.7182, + "step": 20540 + }, + { + "epoch": 0.93, + "learning_rate": 1.342775432384584e-06, + "loss": 0.5711, + "step": 20550 + }, + { + "epoch": 0.93, + "learning_rate": 1.333696490989151e-06, + "loss": 0.6314, + "step": 20560 + }, + { + "epoch": 0.93, + "learning_rate": 1.3246175495937174e-06, + "loss": 0.6828, + "step": 20570 + }, + { + "epoch": 0.93, + "learning_rate": 1.315538608198284e-06, + "loss": 0.6562, + "step": 20580 + }, + { + "epoch": 0.93, + "learning_rate": 1.306459666802851e-06, + "loss": 0.8039, + "step": 20590 + }, + { + "epoch": 0.94, + "learning_rate": 1.2973807254074175e-06, + "loss": 0.6371, + "step": 20600 + }, + { + "epoch": 0.94, + "learning_rate": 1.2883017840119844e-06, + "loss": 0.6227, + "step": 20610 + }, + { + "epoch": 0.94, + "learning_rate": 1.2792228426165509e-06, + "loss": 0.6477, + "step": 20620 + }, + { + "epoch": 0.94, + "learning_rate": 1.2701439012211178e-06, + "loss": 0.7176, + "step": 20630 + }, + { + "epoch": 0.94, + "learning_rate": 1.2610649598256845e-06, + "loss": 0.6961, + "step": 20640 + }, + { + "epoch": 0.94, + "learning_rate": 1.2519860184302512e-06, + "loss": 0.5751, + "step": 20650 + }, + { + "epoch": 0.94, + "learning_rate": 1.2429070770348179e-06, + "loss": 0.6572, + "step": 20660 + }, + { + "epoch": 0.94, + "learning_rate": 1.2338281356393846e-06, + "loss": 0.6223, + "step": 20670 + }, + { + "epoch": 0.94, + "learning_rate": 1.2247491942439513e-06, + "loss": 0.768, + "step": 20680 + }, + { + "epoch": 0.94, + "learning_rate": 1.215670252848518e-06, + "loss": 0.5842, + "step": 20690 + }, + { + "epoch": 0.94, + "learning_rate": 1.2065913114530846e-06, + "loss": 0.6072, + "step": 20700 + }, + { + "epoch": 0.94, + "learning_rate": 1.1975123700576513e-06, + "loss": 0.6664, + "step": 20710 + }, + { + "epoch": 0.94, + "learning_rate": 1.188433428662218e-06, + "loss": 0.6254, + "step": 20720 + }, + { + "epoch": 0.94, + "learning_rate": 1.1793544872667847e-06, + "loss": 0.6463, + "step": 20730 + }, + { + "epoch": 0.94, + "learning_rate": 1.1702755458713514e-06, + "loss": 0.7381, + "step": 20740 + }, + { + "epoch": 0.94, + "learning_rate": 1.1611966044759181e-06, + "loss": 0.6648, + "step": 20750 + }, + { + "epoch": 0.94, + "learning_rate": 1.1521176630804848e-06, + "loss": 0.7143, + "step": 20760 + }, + { + "epoch": 0.94, + "learning_rate": 1.1430387216850517e-06, + "loss": 0.6377, + "step": 20770 + }, + { + "epoch": 0.94, + "learning_rate": 1.1339597802896184e-06, + "loss": 0.6785, + "step": 20780 + }, + { + "epoch": 0.94, + "learning_rate": 1.1248808388941851e-06, + "loss": 0.7199, + "step": 20790 + }, + { + "epoch": 0.94, + "learning_rate": 1.1158018974987516e-06, + "loss": 0.6695, + "step": 20800 + }, + { + "epoch": 0.94, + "learning_rate": 1.1067229561033183e-06, + "loss": 0.6596, + "step": 20810 + }, + { + "epoch": 0.95, + "learning_rate": 1.0976440147078852e-06, + "loss": 0.7838, + "step": 20820 + }, + { + "epoch": 0.95, + "learning_rate": 1.088565073312452e-06, + "loss": 0.6062, + "step": 20830 + }, + { + "epoch": 0.95, + "learning_rate": 1.0794861319170186e-06, + "loss": 0.6529, + "step": 20840 + }, + { + "epoch": 0.95, + "learning_rate": 1.0704071905215853e-06, + "loss": 0.7447, + "step": 20850 + }, + { + "epoch": 0.95, + "learning_rate": 1.061328249126152e-06, + "loss": 0.7277, + "step": 20860 + }, + { + "epoch": 0.95, + "learning_rate": 1.0522493077307187e-06, + "loss": 0.6977, + "step": 20870 + }, + { + "epoch": 0.95, + "learning_rate": 1.0431703663352854e-06, + "loss": 0.5682, + "step": 20880 + }, + { + "epoch": 0.95, + "learning_rate": 1.034091424939852e-06, + "loss": 0.6207, + "step": 20890 + }, + { + "epoch": 0.95, + "learning_rate": 1.0250124835444188e-06, + "loss": 0.6986, + "step": 20900 + }, + { + "epoch": 0.95, + "learning_rate": 1.0159335421489855e-06, + "loss": 0.6393, + "step": 20910 + }, + { + "epoch": 0.95, + "learning_rate": 1.0068546007535522e-06, + "loss": 0.609, + "step": 20920 + }, + { + "epoch": 0.95, + "learning_rate": 9.977756593581189e-07, + "loss": 0.727, + "step": 20930 + }, + { + "epoch": 0.95, + "learning_rate": 9.886967179626856e-07, + "loss": 0.6385, + "step": 20940 + }, + { + "epoch": 0.95, + "learning_rate": 9.796177765672525e-07, + "loss": 0.7049, + "step": 20950 + }, + { + "epoch": 0.95, + "learning_rate": 9.705388351718192e-07, + "loss": 0.6059, + "step": 20960 + }, + { + "epoch": 0.95, + "learning_rate": 9.614598937763857e-07, + "loss": 0.6453, + "step": 20970 + }, + { + "epoch": 0.95, + "learning_rate": 9.523809523809525e-07, + "loss": 0.61, + "step": 20980 + }, + { + "epoch": 0.95, + "learning_rate": 9.433020109855191e-07, + "loss": 0.7059, + "step": 20990 + }, + { + "epoch": 0.95, + "learning_rate": 9.342230695900858e-07, + "loss": 0.6539, + "step": 21000 + }, + { + "epoch": 0.95, + "eval_accuracy": 0.6279575084500242, + "eval_loss": 0.6598912477493286, + "eval_runtime": 73.9974, + "eval_samples_per_second": 55.975, + "eval_steps_per_second": 14.0, + "step": 21000 + }, + { + "epoch": 0.95, + "learning_rate": 9.251441281946525e-07, + "loss": 0.7365, + "step": 21010 + }, + { + "epoch": 0.95, + "learning_rate": 9.160651867992193e-07, + "loss": 0.7266, + "step": 21020 + }, + { + "epoch": 0.95, + "learning_rate": 9.06986245403786e-07, + "loss": 0.7639, + "step": 21030 + }, + { + "epoch": 0.96, + "learning_rate": 8.979073040083527e-07, + "loss": 0.6879, + "step": 21040 + }, + { + "epoch": 0.96, + "learning_rate": 8.888283626129194e-07, + "loss": 0.7623, + "step": 21050 + }, + { + "epoch": 0.96, + "learning_rate": 8.79749421217486e-07, + "loss": 0.6229, + "step": 21060 + }, + { + "epoch": 0.96, + "learning_rate": 8.706704798220528e-07, + "loss": 0.5939, + "step": 21070 + }, + { + "epoch": 0.96, + "learning_rate": 8.615915384266195e-07, + "loss": 0.6016, + "step": 21080 + }, + { + "epoch": 0.96, + "learning_rate": 8.525125970311862e-07, + "loss": 0.5709, + "step": 21090 + }, + { + "epoch": 0.96, + "learning_rate": 8.434336556357529e-07, + "loss": 0.6391, + "step": 21100 + }, + { + "epoch": 0.96, + "learning_rate": 8.343547142403197e-07, + "loss": 0.5955, + "step": 21110 + }, + { + "epoch": 0.96, + "learning_rate": 8.252757728448864e-07, + "loss": 0.6453, + "step": 21120 + }, + { + "epoch": 0.96, + "learning_rate": 8.161968314494531e-07, + "loss": 0.5959, + "step": 21130 + }, + { + "epoch": 0.96, + "learning_rate": 8.071178900540198e-07, + "loss": 0.7219, + "step": 21140 + }, + { + "epoch": 0.96, + "learning_rate": 7.980389486585864e-07, + "loss": 0.7617, + "step": 21150 + }, + { + "epoch": 0.96, + "learning_rate": 7.889600072631531e-07, + "loss": 0.6307, + "step": 21160 + }, + { + "epoch": 0.96, + "learning_rate": 7.798810658677199e-07, + "loss": 0.6355, + "step": 21170 + }, + { + "epoch": 0.96, + "learning_rate": 7.708021244722866e-07, + "loss": 0.6818, + "step": 21180 + }, + { + "epoch": 0.96, + "learning_rate": 7.617231830768533e-07, + "loss": 0.7445, + "step": 21190 + }, + { + "epoch": 0.96, + "learning_rate": 7.5264424168142e-07, + "loss": 0.6539, + "step": 21200 + }, + { + "epoch": 0.96, + "learning_rate": 7.435653002859868e-07, + "loss": 0.6465, + "step": 21210 + }, + { + "epoch": 0.96, + "learning_rate": 7.344863588905535e-07, + "loss": 0.6133, + "step": 21220 + }, + { + "epoch": 0.96, + "learning_rate": 7.254074174951202e-07, + "loss": 0.6699, + "step": 21230 + }, + { + "epoch": 0.96, + "learning_rate": 7.163284760996868e-07, + "loss": 0.6191, + "step": 21240 + }, + { + "epoch": 0.96, + "learning_rate": 7.072495347042535e-07, + "loss": 0.5492, + "step": 21250 + }, + { + "epoch": 0.97, + "learning_rate": 6.981705933088203e-07, + "loss": 0.8207, + "step": 21260 + }, + { + "epoch": 0.97, + "learning_rate": 6.89091651913387e-07, + "loss": 0.6326, + "step": 21270 + }, + { + "epoch": 0.97, + "learning_rate": 6.800127105179537e-07, + "loss": 0.7082, + "step": 21280 + }, + { + "epoch": 0.97, + "learning_rate": 6.709337691225204e-07, + "loss": 0.7359, + "step": 21290 + }, + { + "epoch": 0.97, + "learning_rate": 6.618548277270872e-07, + "loss": 0.7242, + "step": 21300 + }, + { + "epoch": 0.97, + "learning_rate": 6.527758863316538e-07, + "loss": 0.7369, + "step": 21310 + }, + { + "epoch": 0.97, + "learning_rate": 6.436969449362204e-07, + "loss": 0.6687, + "step": 21320 + }, + { + "epoch": 0.97, + "learning_rate": 6.346180035407871e-07, + "loss": 0.7807, + "step": 21330 + }, + { + "epoch": 0.97, + "learning_rate": 6.255390621453538e-07, + "loss": 0.6871, + "step": 21340 + }, + { + "epoch": 0.97, + "learning_rate": 6.164601207499206e-07, + "loss": 0.6143, + "step": 21350 + }, + { + "epoch": 0.97, + "learning_rate": 6.073811793544873e-07, + "loss": 0.6752, + "step": 21360 + }, + { + "epoch": 0.97, + "learning_rate": 5.98302237959054e-07, + "loss": 0.6838, + "step": 21370 + }, + { + "epoch": 0.97, + "learning_rate": 5.892232965636207e-07, + "loss": 0.5486, + "step": 21380 + }, + { + "epoch": 0.97, + "learning_rate": 5.801443551681874e-07, + "loss": 0.6605, + "step": 21390 + }, + { + "epoch": 0.97, + "learning_rate": 5.710654137727541e-07, + "loss": 0.6533, + "step": 21400 + }, + { + "epoch": 0.97, + "learning_rate": 5.619864723773208e-07, + "loss": 0.616, + "step": 21410 + }, + { + "epoch": 0.97, + "learning_rate": 5.529075309818876e-07, + "loss": 0.6836, + "step": 21420 + }, + { + "epoch": 0.97, + "learning_rate": 5.438285895864542e-07, + "loss": 0.6934, + "step": 21430 + }, + { + "epoch": 0.97, + "learning_rate": 5.34749648191021e-07, + "loss": 0.768, + "step": 21440 + }, + { + "epoch": 0.97, + "learning_rate": 5.256707067955877e-07, + "loss": 0.6928, + "step": 21450 + }, + { + "epoch": 0.97, + "learning_rate": 5.165917654001544e-07, + "loss": 0.6156, + "step": 21460 + }, + { + "epoch": 0.97, + "learning_rate": 5.075128240047211e-07, + "loss": 0.6945, + "step": 21470 + }, + { + "epoch": 0.98, + "learning_rate": 4.984338826092878e-07, + "loss": 0.6664, + "step": 21480 + }, + { + "epoch": 0.98, + "learning_rate": 4.893549412138545e-07, + "loss": 0.6572, + "step": 21490 + }, + { + "epoch": 0.98, + "learning_rate": 4.802759998184212e-07, + "loss": 0.6396, + "step": 21500 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.6253017865765331, + "eval_loss": 0.6602552533149719, + "eval_runtime": 73.7722, + "eval_samples_per_second": 56.146, + "eval_steps_per_second": 14.043, + "step": 21500 + }, + { + "epoch": 0.98, + "learning_rate": 4.7119705842298793e-07, + "loss": 0.6551, + "step": 21510 + }, + { + "epoch": 0.98, + "learning_rate": 4.6211811702755463e-07, + "loss": 0.6627, + "step": 21520 + }, + { + "epoch": 0.98, + "learning_rate": 4.530391756321213e-07, + "loss": 0.7145, + "step": 21530 + }, + { + "epoch": 0.98, + "learning_rate": 4.4396023423668807e-07, + "loss": 0.6893, + "step": 21540 + }, + { + "epoch": 0.98, + "learning_rate": 4.3488129284125477e-07, + "loss": 0.6969, + "step": 21550 + }, + { + "epoch": 0.98, + "learning_rate": 4.258023514458214e-07, + "loss": 0.7125, + "step": 21560 + }, + { + "epoch": 0.98, + "learning_rate": 4.1672341005038816e-07, + "loss": 0.6242, + "step": 21570 + }, + { + "epoch": 0.98, + "learning_rate": 4.0764446865495486e-07, + "loss": 0.635, + "step": 21580 + }, + { + "epoch": 0.98, + "learning_rate": 3.985655272595216e-07, + "loss": 0.7063, + "step": 21590 + }, + { + "epoch": 0.98, + "learning_rate": 3.8948658586408825e-07, + "loss": 0.775, + "step": 21600 + }, + { + "epoch": 0.98, + "learning_rate": 3.8040764446865495e-07, + "loss": 0.5451, + "step": 21610 + }, + { + "epoch": 0.98, + "learning_rate": 3.713287030732217e-07, + "loss": 0.5568, + "step": 21620 + }, + { + "epoch": 0.98, + "learning_rate": 3.622497616777884e-07, + "loss": 0.7299, + "step": 21630 + }, + { + "epoch": 0.98, + "learning_rate": 3.5317082028235514e-07, + "loss": 0.6406, + "step": 21640 + }, + { + "epoch": 0.98, + "learning_rate": 3.440918788869218e-07, + "loss": 0.6998, + "step": 21650 + }, + { + "epoch": 0.98, + "learning_rate": 3.3501293749148853e-07, + "loss": 0.6822, + "step": 21660 + }, + { + "epoch": 0.98, + "learning_rate": 3.2593399609605523e-07, + "loss": 0.6109, + "step": 21670 + }, + { + "epoch": 0.98, + "learning_rate": 3.16855054700622e-07, + "loss": 0.6658, + "step": 21680 + }, + { + "epoch": 0.98, + "learning_rate": 3.077761133051886e-07, + "loss": 0.6207, + "step": 21690 + }, + { + "epoch": 0.99, + "learning_rate": 2.986971719097553e-07, + "loss": 0.7873, + "step": 21700 + }, + { + "epoch": 0.99, + "learning_rate": 2.8961823051432207e-07, + "loss": 0.6695, + "step": 21710 + }, + { + "epoch": 0.99, + "learning_rate": 2.8053928911888876e-07, + "loss": 0.6246, + "step": 21720 + }, + { + "epoch": 0.99, + "learning_rate": 2.7146034772345546e-07, + "loss": 0.6021, + "step": 21730 + }, + { + "epoch": 0.99, + "learning_rate": 2.623814063280222e-07, + "loss": 0.6416, + "step": 21740 + }, + { + "epoch": 0.99, + "learning_rate": 2.533024649325889e-07, + "loss": 0.6463, + "step": 21750 + }, + { + "epoch": 0.99, + "learning_rate": 2.442235235371556e-07, + "loss": 0.6547, + "step": 21760 + }, + { + "epoch": 0.99, + "learning_rate": 2.351445821417223e-07, + "loss": 0.6244, + "step": 21770 + }, + { + "epoch": 0.99, + "learning_rate": 2.2606564074628902e-07, + "loss": 0.6291, + "step": 21780 + }, + { + "epoch": 0.99, + "learning_rate": 2.1698669935085571e-07, + "loss": 0.7139, + "step": 21790 + }, + { + "epoch": 0.99, + "learning_rate": 2.0790775795542244e-07, + "loss": 0.6533, + "step": 21800 + }, + { + "epoch": 0.99, + "learning_rate": 1.988288165599891e-07, + "loss": 0.7258, + "step": 21810 + }, + { + "epoch": 0.99, + "learning_rate": 1.8974987516455583e-07, + "loss": 0.6953, + "step": 21820 + }, + { + "epoch": 0.99, + "learning_rate": 1.8067093376912252e-07, + "loss": 0.6939, + "step": 21830 + }, + { + "epoch": 0.99, + "learning_rate": 1.7159199237368925e-07, + "loss": 0.7047, + "step": 21840 + }, + { + "epoch": 0.99, + "learning_rate": 1.6251305097825594e-07, + "loss": 0.6329, + "step": 21850 + }, + { + "epoch": 0.99, + "learning_rate": 1.5343410958282267e-07, + "loss": 0.6727, + "step": 21860 + }, + { + "epoch": 0.99, + "learning_rate": 1.4435516818738936e-07, + "loss": 0.7387, + "step": 21870 + }, + { + "epoch": 0.99, + "learning_rate": 1.3527622679195606e-07, + "loss": 0.7191, + "step": 21880 + }, + { + "epoch": 0.99, + "learning_rate": 1.2619728539652278e-07, + "loss": 0.7266, + "step": 21890 + }, + { + "epoch": 0.99, + "learning_rate": 1.1711834400108948e-07, + "loss": 0.6119, + "step": 21900 + }, + { + "epoch": 0.99, + "learning_rate": 1.0803940260565619e-07, + "loss": 0.6201, + "step": 21910 + }, + { + "epoch": 1.0, + "learning_rate": 9.896046121022288e-08, + "loss": 0.7092, + "step": 21920 + }, + { + "epoch": 1.0, + "learning_rate": 8.98815198147896e-08, + "loss": 0.6348, + "step": 21930 + }, + { + "epoch": 1.0, + "learning_rate": 8.080257841935631e-08, + "loss": 0.7164, + "step": 21940 + }, + { + "epoch": 1.0, + "learning_rate": 7.172363702392301e-08, + "loss": 0.6273, + "step": 21950 + }, + { + "epoch": 1.0, + "learning_rate": 6.264469562848973e-08, + "loss": 0.6154, + "step": 21960 + }, + { + "epoch": 1.0, + "learning_rate": 5.3565754233056436e-08, + "loss": 0.7488, + "step": 21970 + }, + { + "epoch": 1.0, + "learning_rate": 4.448681283762314e-08, + "loss": 0.6797, + "step": 21980 + }, + { + "epoch": 1.0, + "learning_rate": 3.540787144218984e-08, + "loss": 0.6977, + "step": 21990 + }, + { + "epoch": 1.0, + "learning_rate": 2.6328930046756547e-08, + "loss": 0.632, + "step": 22000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.6269917914051183, + "eval_loss": 0.6599072813987732, + "eval_runtime": 73.7894, + "eval_samples_per_second": 56.133, + "eval_steps_per_second": 14.04, + "step": 22000 + } + ], + "max_steps": 22029, + "num_train_epochs": 1, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +}