{ "best_metric": 0.8367530107498169, "best_model_checkpoint": "albert-base-v2-Malicious_URLs/checkpoint-51087", "epoch": 1.0, "global_step": 51087, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9999608510971483e-05, "loss": 1.1706, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.9980425548574e-05, "loss": 1.1046, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.9960851097148004e-05, "loss": 0.9971, "step": 100 }, { "epoch": 0.0, "learning_rate": 1.9941276645722004e-05, "loss": 1.0489, "step": 150 }, { "epoch": 0.0, "learning_rate": 1.9921702194296007e-05, "loss": 0.9931, "step": 200 }, { "epoch": 0.0, "learning_rate": 1.9902127742870007e-05, "loss": 1.0318, "step": 250 }, { "epoch": 0.01, "learning_rate": 1.988255329144401e-05, "loss": 1.015, "step": 300 }, { "epoch": 0.01, "learning_rate": 1.986297884001801e-05, "loss": 0.8838, "step": 350 }, { "epoch": 0.01, "learning_rate": 1.9843404388592012e-05, "loss": 0.9699, "step": 400 }, { "epoch": 0.01, "learning_rate": 1.9823829937166012e-05, "loss": 0.9734, "step": 450 }, { "epoch": 0.01, "learning_rate": 1.9804255485740015e-05, "loss": 0.9944, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.9784681034314015e-05, "loss": 0.9173, "step": 550 }, { "epoch": 0.01, "learning_rate": 1.9765106582888018e-05, "loss": 0.8308, "step": 600 }, { "epoch": 0.01, "learning_rate": 1.9745532131462017e-05, "loss": 0.8602, "step": 650 }, { "epoch": 0.01, "learning_rate": 1.9725957680036017e-05, "loss": 0.9536, "step": 700 }, { "epoch": 0.01, "learning_rate": 1.970638322861002e-05, "loss": 0.9808, "step": 750 }, { "epoch": 0.02, "learning_rate": 1.968680877718402e-05, "loss": 0.884, "step": 800 }, { "epoch": 0.02, "learning_rate": 1.9667234325758023e-05, "loss": 0.9394, "step": 850 }, { "epoch": 0.02, "learning_rate": 1.9647659874332022e-05, "loss": 0.9618, "step": 900 }, { "epoch": 0.02, "learning_rate": 1.9628085422906025e-05, "loss": 0.9904, "step": 950 }, { "epoch": 0.02, "learning_rate": 1.9608510971480025e-05, "loss": 0.9654, "step": 1000 }, { "epoch": 0.02, "learning_rate": 1.9588936520054028e-05, "loss": 0.8642, "step": 1050 }, { "epoch": 0.02, "learning_rate": 1.9569362068628028e-05, "loss": 0.949, "step": 1100 }, { "epoch": 0.02, "learning_rate": 1.954978761720203e-05, "loss": 0.8394, "step": 1150 }, { "epoch": 0.02, "learning_rate": 1.953021316577603e-05, "loss": 0.8666, "step": 1200 }, { "epoch": 0.02, "learning_rate": 1.9510638714350033e-05, "loss": 0.9656, "step": 1250 }, { "epoch": 0.03, "learning_rate": 1.9491064262924033e-05, "loss": 0.9162, "step": 1300 }, { "epoch": 0.03, "learning_rate": 1.9471489811498033e-05, "loss": 0.9125, "step": 1350 }, { "epoch": 0.03, "learning_rate": 1.9451915360072036e-05, "loss": 0.872, "step": 1400 }, { "epoch": 0.03, "learning_rate": 1.9432340908646035e-05, "loss": 0.9858, "step": 1450 }, { "epoch": 0.03, "learning_rate": 1.941276645722004e-05, "loss": 0.9059, "step": 1500 }, { "epoch": 0.03, "learning_rate": 1.9393192005794038e-05, "loss": 0.9647, "step": 1550 }, { "epoch": 0.03, "learning_rate": 1.937361755436804e-05, "loss": 0.9802, "step": 1600 }, { "epoch": 0.03, "learning_rate": 1.935404310294204e-05, "loss": 0.9604, "step": 1650 }, { "epoch": 0.03, "learning_rate": 1.9334468651516044e-05, "loss": 0.8774, "step": 1700 }, { "epoch": 0.03, "learning_rate": 1.9314894200090043e-05, "loss": 0.8881, "step": 1750 }, { "epoch": 0.04, "learning_rate": 1.9295319748664047e-05, "loss": 0.8775, "step": 1800 }, { "epoch": 0.04, "learning_rate": 1.9275745297238046e-05, "loss": 0.9427, "step": 1850 }, { "epoch": 0.04, "learning_rate": 1.925617084581205e-05, "loss": 0.8997, "step": 1900 }, { "epoch": 0.04, "learning_rate": 1.923659639438605e-05, "loss": 0.9101, "step": 1950 }, { "epoch": 0.04, "learning_rate": 1.9217021942960052e-05, "loss": 0.9247, "step": 2000 }, { "epoch": 0.04, "learning_rate": 1.919744749153405e-05, "loss": 0.8978, "step": 2050 }, { "epoch": 0.04, "learning_rate": 1.917787304010805e-05, "loss": 0.9436, "step": 2100 }, { "epoch": 0.04, "learning_rate": 1.9158298588682054e-05, "loss": 0.91, "step": 2150 }, { "epoch": 0.04, "learning_rate": 1.9138724137256054e-05, "loss": 0.965, "step": 2200 }, { "epoch": 0.04, "learning_rate": 1.9119149685830057e-05, "loss": 0.9488, "step": 2250 }, { "epoch": 0.05, "learning_rate": 1.9099575234404057e-05, "loss": 0.8534, "step": 2300 }, { "epoch": 0.05, "learning_rate": 1.908000078297806e-05, "loss": 0.9636, "step": 2350 }, { "epoch": 0.05, "learning_rate": 1.906042633155206e-05, "loss": 0.9581, "step": 2400 }, { "epoch": 0.05, "learning_rate": 1.9040851880126062e-05, "loss": 0.83, "step": 2450 }, { "epoch": 0.05, "learning_rate": 1.9021277428700062e-05, "loss": 0.9326, "step": 2500 }, { "epoch": 0.05, "learning_rate": 1.9001702977274065e-05, "loss": 0.9152, "step": 2550 }, { "epoch": 0.05, "learning_rate": 1.8982128525848065e-05, "loss": 0.9363, "step": 2600 }, { "epoch": 0.05, "learning_rate": 1.8962554074422068e-05, "loss": 0.8746, "step": 2650 }, { "epoch": 0.05, "learning_rate": 1.8942979622996067e-05, "loss": 0.8996, "step": 2700 }, { "epoch": 0.05, "learning_rate": 1.8923405171570067e-05, "loss": 0.9116, "step": 2750 }, { "epoch": 0.05, "learning_rate": 1.890383072014407e-05, "loss": 0.8015, "step": 2800 }, { "epoch": 0.06, "learning_rate": 1.888425626871807e-05, "loss": 0.926, "step": 2850 }, { "epoch": 0.06, "learning_rate": 1.8864681817292073e-05, "loss": 0.8506, "step": 2900 }, { "epoch": 0.06, "learning_rate": 1.8845107365866072e-05, "loss": 0.9913, "step": 2950 }, { "epoch": 0.06, "learning_rate": 1.8825532914440075e-05, "loss": 0.8355, "step": 3000 }, { "epoch": 0.06, "learning_rate": 1.8805958463014075e-05, "loss": 0.9649, "step": 3050 }, { "epoch": 0.06, "learning_rate": 1.8786384011588078e-05, "loss": 0.8837, "step": 3100 }, { "epoch": 0.06, "learning_rate": 1.8766809560162078e-05, "loss": 0.9546, "step": 3150 }, { "epoch": 0.06, "learning_rate": 1.874723510873608e-05, "loss": 0.8529, "step": 3200 }, { "epoch": 0.06, "learning_rate": 1.872766065731008e-05, "loss": 0.9503, "step": 3250 }, { "epoch": 0.06, "learning_rate": 1.8708086205884083e-05, "loss": 1.0059, "step": 3300 }, { "epoch": 0.07, "learning_rate": 1.8688511754458083e-05, "loss": 0.9493, "step": 3350 }, { "epoch": 0.07, "learning_rate": 1.8668937303032083e-05, "loss": 0.8838, "step": 3400 }, { "epoch": 0.07, "learning_rate": 1.8649362851606086e-05, "loss": 0.8728, "step": 3450 }, { "epoch": 0.07, "learning_rate": 1.8629788400180085e-05, "loss": 0.8237, "step": 3500 }, { "epoch": 0.07, "learning_rate": 1.861021394875409e-05, "loss": 0.9691, "step": 3550 }, { "epoch": 0.07, "learning_rate": 1.8590639497328088e-05, "loss": 0.8934, "step": 3600 }, { "epoch": 0.07, "learning_rate": 1.857106504590209e-05, "loss": 0.8628, "step": 3650 }, { "epoch": 0.07, "learning_rate": 1.855149059447609e-05, "loss": 0.9047, "step": 3700 }, { "epoch": 0.07, "learning_rate": 1.8531916143050094e-05, "loss": 0.9896, "step": 3750 }, { "epoch": 0.07, "learning_rate": 1.8512341691624093e-05, "loss": 0.8885, "step": 3800 }, { "epoch": 0.08, "learning_rate": 1.8492767240198096e-05, "loss": 0.9171, "step": 3850 }, { "epoch": 0.08, "learning_rate": 1.8473192788772096e-05, "loss": 0.9144, "step": 3900 }, { "epoch": 0.08, "learning_rate": 1.84536183373461e-05, "loss": 0.949, "step": 3950 }, { "epoch": 0.08, "learning_rate": 1.84340438859201e-05, "loss": 0.9595, "step": 4000 }, { "epoch": 0.08, "learning_rate": 1.84144694344941e-05, "loss": 0.9018, "step": 4050 }, { "epoch": 0.08, "learning_rate": 1.83948949830681e-05, "loss": 0.9434, "step": 4100 }, { "epoch": 0.08, "learning_rate": 1.83753205316421e-05, "loss": 1.0054, "step": 4150 }, { "epoch": 0.08, "learning_rate": 1.8355746080216104e-05, "loss": 0.8584, "step": 4200 }, { "epoch": 0.08, "learning_rate": 1.8336171628790104e-05, "loss": 0.8759, "step": 4250 }, { "epoch": 0.08, "learning_rate": 1.8316597177364107e-05, "loss": 0.7978, "step": 4300 }, { "epoch": 0.09, "learning_rate": 1.8297022725938106e-05, "loss": 0.8494, "step": 4350 }, { "epoch": 0.09, "learning_rate": 1.827744827451211e-05, "loss": 0.8714, "step": 4400 }, { "epoch": 0.09, "learning_rate": 1.825787382308611e-05, "loss": 0.9271, "step": 4450 }, { "epoch": 0.09, "learning_rate": 1.8238299371660112e-05, "loss": 0.9466, "step": 4500 }, { "epoch": 0.09, "learning_rate": 1.8218724920234112e-05, "loss": 0.8289, "step": 4550 }, { "epoch": 0.09, "learning_rate": 1.8199150468808115e-05, "loss": 0.9159, "step": 4600 }, { "epoch": 0.09, "learning_rate": 1.8179576017382114e-05, "loss": 0.8418, "step": 4650 }, { "epoch": 0.09, "learning_rate": 1.8160001565956114e-05, "loss": 0.9818, "step": 4700 }, { "epoch": 0.09, "learning_rate": 1.8140427114530117e-05, "loss": 0.8762, "step": 4750 }, { "epoch": 0.09, "learning_rate": 1.8120852663104117e-05, "loss": 0.9628, "step": 4800 }, { "epoch": 0.09, "learning_rate": 1.810127821167812e-05, "loss": 0.7778, "step": 4850 }, { "epoch": 0.1, "learning_rate": 1.808170376025212e-05, "loss": 0.8857, "step": 4900 }, { "epoch": 0.1, "learning_rate": 1.8062129308826123e-05, "loss": 0.9288, "step": 4950 }, { "epoch": 0.1, "learning_rate": 1.8042554857400122e-05, "loss": 0.9101, "step": 5000 }, { "epoch": 0.1, "learning_rate": 1.8022980405974125e-05, "loss": 0.9358, "step": 5050 }, { "epoch": 0.1, "learning_rate": 1.8003405954548125e-05, "loss": 0.8956, "step": 5100 }, { "epoch": 0.1, "learning_rate": 1.7983831503122128e-05, "loss": 0.8506, "step": 5150 }, { "epoch": 0.1, "learning_rate": 1.7964257051696128e-05, "loss": 0.8635, "step": 5200 }, { "epoch": 0.1, "learning_rate": 1.794468260027013e-05, "loss": 0.9267, "step": 5250 }, { "epoch": 0.1, "learning_rate": 1.792510814884413e-05, "loss": 0.8688, "step": 5300 }, { "epoch": 0.1, "learning_rate": 1.790553369741813e-05, "loss": 0.8538, "step": 5350 }, { "epoch": 0.11, "learning_rate": 1.7885959245992133e-05, "loss": 0.8628, "step": 5400 }, { "epoch": 0.11, "learning_rate": 1.7866384794566133e-05, "loss": 0.9528, "step": 5450 }, { "epoch": 0.11, "learning_rate": 1.7846810343140136e-05, "loss": 0.9198, "step": 5500 }, { "epoch": 0.11, "learning_rate": 1.7827235891714135e-05, "loss": 0.8878, "step": 5550 }, { "epoch": 0.11, "learning_rate": 1.7807661440288138e-05, "loss": 0.9425, "step": 5600 }, { "epoch": 0.11, "learning_rate": 1.7788086988862138e-05, "loss": 0.8977, "step": 5650 }, { "epoch": 0.11, "learning_rate": 1.776851253743614e-05, "loss": 0.8967, "step": 5700 }, { "epoch": 0.11, "learning_rate": 1.774893808601014e-05, "loss": 0.9365, "step": 5750 }, { "epoch": 0.11, "learning_rate": 1.7729363634584144e-05, "loss": 0.8376, "step": 5800 }, { "epoch": 0.11, "learning_rate": 1.7709789183158143e-05, "loss": 0.8587, "step": 5850 }, { "epoch": 0.12, "learning_rate": 1.7690214731732146e-05, "loss": 0.8808, "step": 5900 }, { "epoch": 0.12, "learning_rate": 1.7670640280306146e-05, "loss": 0.8871, "step": 5950 }, { "epoch": 0.12, "learning_rate": 1.765106582888015e-05, "loss": 0.8399, "step": 6000 }, { "epoch": 0.12, "learning_rate": 1.763149137745415e-05, "loss": 0.9007, "step": 6050 }, { "epoch": 0.12, "learning_rate": 1.7611916926028148e-05, "loss": 0.9102, "step": 6100 }, { "epoch": 0.12, "learning_rate": 1.759234247460215e-05, "loss": 0.8689, "step": 6150 }, { "epoch": 0.12, "learning_rate": 1.757276802317615e-05, "loss": 0.9715, "step": 6200 }, { "epoch": 0.12, "learning_rate": 1.7553193571750154e-05, "loss": 0.893, "step": 6250 }, { "epoch": 0.12, "learning_rate": 1.7533619120324154e-05, "loss": 0.8597, "step": 6300 }, { "epoch": 0.12, "learning_rate": 1.7514044668898157e-05, "loss": 0.9172, "step": 6350 }, { "epoch": 0.13, "learning_rate": 1.7494470217472156e-05, "loss": 0.9287, "step": 6400 }, { "epoch": 0.13, "learning_rate": 1.747489576604616e-05, "loss": 0.8687, "step": 6450 }, { "epoch": 0.13, "learning_rate": 1.745532131462016e-05, "loss": 0.8804, "step": 6500 }, { "epoch": 0.13, "learning_rate": 1.7435746863194162e-05, "loss": 0.9483, "step": 6550 }, { "epoch": 0.13, "learning_rate": 1.741617241176816e-05, "loss": 0.8749, "step": 6600 }, { "epoch": 0.13, "learning_rate": 1.7396597960342165e-05, "loss": 0.9452, "step": 6650 }, { "epoch": 0.13, "learning_rate": 1.7377023508916164e-05, "loss": 0.9059, "step": 6700 }, { "epoch": 0.13, "learning_rate": 1.7357449057490164e-05, "loss": 0.8311, "step": 6750 }, { "epoch": 0.13, "learning_rate": 1.7337874606064167e-05, "loss": 0.8671, "step": 6800 }, { "epoch": 0.13, "learning_rate": 1.7318300154638167e-05, "loss": 0.9169, "step": 6850 }, { "epoch": 0.14, "learning_rate": 1.729872570321217e-05, "loss": 0.9448, "step": 6900 }, { "epoch": 0.14, "learning_rate": 1.727915125178617e-05, "loss": 0.8703, "step": 6950 }, { "epoch": 0.14, "learning_rate": 1.7259576800360172e-05, "loss": 0.9438, "step": 7000 }, { "epoch": 0.14, "learning_rate": 1.7240002348934172e-05, "loss": 0.868, "step": 7050 }, { "epoch": 0.14, "learning_rate": 1.7220427897508175e-05, "loss": 0.9868, "step": 7100 }, { "epoch": 0.14, "learning_rate": 1.7200853446082175e-05, "loss": 0.7904, "step": 7150 }, { "epoch": 0.14, "learning_rate": 1.7181278994656178e-05, "loss": 0.8628, "step": 7200 }, { "epoch": 0.14, "learning_rate": 1.7161704543230177e-05, "loss": 0.8898, "step": 7250 }, { "epoch": 0.14, "learning_rate": 1.714213009180418e-05, "loss": 0.9962, "step": 7300 }, { "epoch": 0.14, "learning_rate": 1.712255564037818e-05, "loss": 0.939, "step": 7350 }, { "epoch": 0.14, "learning_rate": 1.710298118895218e-05, "loss": 0.7759, "step": 7400 }, { "epoch": 0.15, "learning_rate": 1.7083406737526183e-05, "loss": 0.8616, "step": 7450 }, { "epoch": 0.15, "learning_rate": 1.7063832286100182e-05, "loss": 0.9495, "step": 7500 }, { "epoch": 0.15, "learning_rate": 1.7044257834674185e-05, "loss": 0.8752, "step": 7550 }, { "epoch": 0.15, "learning_rate": 1.7024683383248185e-05, "loss": 0.8908, "step": 7600 }, { "epoch": 0.15, "learning_rate": 1.7005108931822188e-05, "loss": 0.8843, "step": 7650 }, { "epoch": 0.15, "learning_rate": 1.6985534480396188e-05, "loss": 0.9643, "step": 7700 }, { "epoch": 0.15, "learning_rate": 1.696596002897019e-05, "loss": 0.8528, "step": 7750 }, { "epoch": 0.15, "learning_rate": 1.694638557754419e-05, "loss": 0.8625, "step": 7800 }, { "epoch": 0.15, "learning_rate": 1.6926811126118193e-05, "loss": 0.9945, "step": 7850 }, { "epoch": 0.15, "learning_rate": 1.6907236674692193e-05, "loss": 0.8112, "step": 7900 }, { "epoch": 0.16, "learning_rate": 1.6887662223266196e-05, "loss": 0.9049, "step": 7950 }, { "epoch": 0.16, "learning_rate": 1.6868087771840196e-05, "loss": 0.8941, "step": 8000 }, { "epoch": 0.16, "learning_rate": 1.6848513320414195e-05, "loss": 0.8887, "step": 8050 }, { "epoch": 0.16, "learning_rate": 1.68289388689882e-05, "loss": 0.941, "step": 8100 }, { "epoch": 0.16, "learning_rate": 1.6809364417562198e-05, "loss": 0.8806, "step": 8150 }, { "epoch": 0.16, "learning_rate": 1.67897899661362e-05, "loss": 0.9605, "step": 8200 }, { "epoch": 0.16, "learning_rate": 1.67702155147102e-05, "loss": 0.8432, "step": 8250 }, { "epoch": 0.16, "learning_rate": 1.6750641063284204e-05, "loss": 0.8381, "step": 8300 }, { "epoch": 0.16, "learning_rate": 1.6731066611858204e-05, "loss": 0.8558, "step": 8350 }, { "epoch": 0.16, "learning_rate": 1.6711492160432207e-05, "loss": 0.9525, "step": 8400 }, { "epoch": 0.17, "learning_rate": 1.6691917709006206e-05, "loss": 0.8599, "step": 8450 }, { "epoch": 0.17, "learning_rate": 1.667234325758021e-05, "loss": 0.9754, "step": 8500 }, { "epoch": 0.17, "learning_rate": 1.665276880615421e-05, "loss": 0.9006, "step": 8550 }, { "epoch": 0.17, "learning_rate": 1.6633194354728212e-05, "loss": 0.8845, "step": 8600 }, { "epoch": 0.17, "learning_rate": 1.661361990330221e-05, "loss": 1.0112, "step": 8650 }, { "epoch": 0.17, "learning_rate": 1.659404545187621e-05, "loss": 1.0344, "step": 8700 }, { "epoch": 0.17, "learning_rate": 1.6574471000450214e-05, "loss": 0.9505, "step": 8750 }, { "epoch": 0.17, "learning_rate": 1.6554896549024214e-05, "loss": 0.8472, "step": 8800 }, { "epoch": 0.17, "learning_rate": 1.6535322097598217e-05, "loss": 0.923, "step": 8850 }, { "epoch": 0.17, "learning_rate": 1.6515747646172217e-05, "loss": 0.9265, "step": 8900 }, { "epoch": 0.18, "learning_rate": 1.649617319474622e-05, "loss": 0.9428, "step": 8950 }, { "epoch": 0.18, "learning_rate": 1.647659874332022e-05, "loss": 0.961, "step": 9000 }, { "epoch": 0.18, "learning_rate": 1.6457024291894222e-05, "loss": 0.8903, "step": 9050 }, { "epoch": 0.18, "learning_rate": 1.6437449840468222e-05, "loss": 0.9757, "step": 9100 }, { "epoch": 0.18, "learning_rate": 1.6417875389042225e-05, "loss": 0.9208, "step": 9150 }, { "epoch": 0.18, "learning_rate": 1.6398300937616225e-05, "loss": 0.9723, "step": 9200 }, { "epoch": 0.18, "learning_rate": 1.6378726486190228e-05, "loss": 0.845, "step": 9250 }, { "epoch": 0.18, "learning_rate": 1.6359152034764227e-05, "loss": 0.9029, "step": 9300 }, { "epoch": 0.18, "learning_rate": 1.633957758333823e-05, "loss": 0.8009, "step": 9350 }, { "epoch": 0.18, "learning_rate": 1.632000313191223e-05, "loss": 0.9144, "step": 9400 }, { "epoch": 0.18, "learning_rate": 1.630042868048623e-05, "loss": 0.9191, "step": 9450 }, { "epoch": 0.19, "learning_rate": 1.6280854229060233e-05, "loss": 0.9451, "step": 9500 }, { "epoch": 0.19, "learning_rate": 1.6261279777634232e-05, "loss": 0.9365, "step": 9550 }, { "epoch": 0.19, "learning_rate": 1.6241705326208235e-05, "loss": 0.8914, "step": 9600 }, { "epoch": 0.19, "learning_rate": 1.6222130874782235e-05, "loss": 0.849, "step": 9650 }, { "epoch": 0.19, "learning_rate": 1.6202556423356238e-05, "loss": 0.8415, "step": 9700 }, { "epoch": 0.19, "learning_rate": 1.6182981971930238e-05, "loss": 0.9671, "step": 9750 }, { "epoch": 0.19, "learning_rate": 1.616340752050424e-05, "loss": 0.8248, "step": 9800 }, { "epoch": 0.19, "learning_rate": 1.614383306907824e-05, "loss": 0.8498, "step": 9850 }, { "epoch": 0.19, "learning_rate": 1.6124258617652243e-05, "loss": 1.0332, "step": 9900 }, { "epoch": 0.19, "learning_rate": 1.6104684166226243e-05, "loss": 0.9052, "step": 9950 }, { "epoch": 0.2, "learning_rate": 1.6085109714800246e-05, "loss": 0.9069, "step": 10000 }, { "epoch": 0.2, "learning_rate": 1.6065535263374246e-05, "loss": 0.8651, "step": 10050 }, { "epoch": 0.2, "learning_rate": 1.6045960811948245e-05, "loss": 0.9262, "step": 10100 }, { "epoch": 0.2, "learning_rate": 1.602638636052225e-05, "loss": 0.9556, "step": 10150 }, { "epoch": 0.2, "learning_rate": 1.6006811909096248e-05, "loss": 0.8846, "step": 10200 }, { "epoch": 0.2, "learning_rate": 1.598723745767025e-05, "loss": 0.8435, "step": 10250 }, { "epoch": 0.2, "learning_rate": 1.596766300624425e-05, "loss": 0.9585, "step": 10300 }, { "epoch": 0.2, "learning_rate": 1.5948088554818254e-05, "loss": 0.8239, "step": 10350 }, { "epoch": 0.2, "learning_rate": 1.5928514103392253e-05, "loss": 0.811, "step": 10400 }, { "epoch": 0.2, "learning_rate": 1.5908939651966256e-05, "loss": 0.8884, "step": 10450 }, { "epoch": 0.21, "learning_rate": 1.5889365200540256e-05, "loss": 0.8534, "step": 10500 }, { "epoch": 0.21, "learning_rate": 1.586979074911426e-05, "loss": 0.9667, "step": 10550 }, { "epoch": 0.21, "learning_rate": 1.585021629768826e-05, "loss": 0.9295, "step": 10600 }, { "epoch": 0.21, "learning_rate": 1.5830641846262262e-05, "loss": 0.9055, "step": 10650 }, { "epoch": 0.21, "learning_rate": 1.581106739483626e-05, "loss": 0.8377, "step": 10700 }, { "epoch": 0.21, "learning_rate": 1.579149294341026e-05, "loss": 0.9062, "step": 10750 }, { "epoch": 0.21, "learning_rate": 1.5771918491984264e-05, "loss": 0.8655, "step": 10800 }, { "epoch": 0.21, "learning_rate": 1.5752344040558264e-05, "loss": 0.8602, "step": 10850 }, { "epoch": 0.21, "learning_rate": 1.5732769589132267e-05, "loss": 0.8707, "step": 10900 }, { "epoch": 0.21, "learning_rate": 1.5713195137706266e-05, "loss": 0.8257, "step": 10950 }, { "epoch": 0.22, "learning_rate": 1.569362068628027e-05, "loss": 0.8605, "step": 11000 }, { "epoch": 0.22, "learning_rate": 1.567404623485427e-05, "loss": 0.7885, "step": 11050 }, { "epoch": 0.22, "learning_rate": 1.5654471783428272e-05, "loss": 0.8821, "step": 11100 }, { "epoch": 0.22, "learning_rate": 1.5634897332002272e-05, "loss": 0.8238, "step": 11150 }, { "epoch": 0.22, "learning_rate": 1.5615322880576275e-05, "loss": 0.9978, "step": 11200 }, { "epoch": 0.22, "learning_rate": 1.5595748429150274e-05, "loss": 0.8357, "step": 11250 }, { "epoch": 0.22, "learning_rate": 1.5576173977724278e-05, "loss": 0.9715, "step": 11300 }, { "epoch": 0.22, "learning_rate": 1.5556599526298277e-05, "loss": 0.8519, "step": 11350 }, { "epoch": 0.22, "learning_rate": 1.5537025074872277e-05, "loss": 0.7997, "step": 11400 }, { "epoch": 0.22, "learning_rate": 1.551745062344628e-05, "loss": 0.7839, "step": 11450 }, { "epoch": 0.23, "learning_rate": 1.549787617202028e-05, "loss": 0.7902, "step": 11500 }, { "epoch": 0.23, "learning_rate": 1.5478301720594283e-05, "loss": 0.9108, "step": 11550 }, { "epoch": 0.23, "learning_rate": 1.5458727269168282e-05, "loss": 0.9531, "step": 11600 }, { "epoch": 0.23, "learning_rate": 1.5439152817742285e-05, "loss": 0.9653, "step": 11650 }, { "epoch": 0.23, "learning_rate": 1.5419578366316285e-05, "loss": 0.8278, "step": 11700 }, { "epoch": 0.23, "learning_rate": 1.5400003914890288e-05, "loss": 0.9261, "step": 11750 }, { "epoch": 0.23, "learning_rate": 1.5380429463464288e-05, "loss": 0.8742, "step": 11800 }, { "epoch": 0.23, "learning_rate": 1.536085501203829e-05, "loss": 0.8484, "step": 11850 }, { "epoch": 0.23, "learning_rate": 1.534128056061229e-05, "loss": 0.9005, "step": 11900 }, { "epoch": 0.23, "learning_rate": 1.5321706109186293e-05, "loss": 0.9226, "step": 11950 }, { "epoch": 0.23, "learning_rate": 1.5302131657760293e-05, "loss": 0.8965, "step": 12000 }, { "epoch": 0.24, "learning_rate": 1.5282557206334293e-05, "loss": 0.9745, "step": 12050 }, { "epoch": 0.24, "learning_rate": 1.5262982754908296e-05, "loss": 0.9371, "step": 12100 }, { "epoch": 0.24, "learning_rate": 1.5243408303482297e-05, "loss": 0.8983, "step": 12150 }, { "epoch": 0.24, "learning_rate": 1.5223833852056298e-05, "loss": 0.9373, "step": 12200 }, { "epoch": 0.24, "learning_rate": 1.52042594006303e-05, "loss": 0.854, "step": 12250 }, { "epoch": 0.24, "learning_rate": 1.5184684949204301e-05, "loss": 0.8189, "step": 12300 }, { "epoch": 0.24, "learning_rate": 1.5165110497778302e-05, "loss": 0.8548, "step": 12350 }, { "epoch": 0.24, "learning_rate": 1.5145536046352302e-05, "loss": 0.8075, "step": 12400 }, { "epoch": 0.24, "learning_rate": 1.5125961594926303e-05, "loss": 0.8479, "step": 12450 }, { "epoch": 0.24, "learning_rate": 1.5106387143500305e-05, "loss": 0.7847, "step": 12500 }, { "epoch": 0.25, "learning_rate": 1.5086812692074306e-05, "loss": 0.859, "step": 12550 }, { "epoch": 0.25, "learning_rate": 1.5067238240648307e-05, "loss": 0.9378, "step": 12600 }, { "epoch": 0.25, "learning_rate": 1.5047663789222309e-05, "loss": 0.911, "step": 12650 }, { "epoch": 0.25, "learning_rate": 1.502808933779631e-05, "loss": 0.9074, "step": 12700 }, { "epoch": 0.25, "learning_rate": 1.5008514886370311e-05, "loss": 0.8419, "step": 12750 }, { "epoch": 0.25, "learning_rate": 1.4988940434944313e-05, "loss": 0.8942, "step": 12800 }, { "epoch": 0.25, "learning_rate": 1.4969365983518314e-05, "loss": 0.8709, "step": 12850 }, { "epoch": 0.25, "learning_rate": 1.4949791532092315e-05, "loss": 0.9247, "step": 12900 }, { "epoch": 0.25, "learning_rate": 1.4930217080666317e-05, "loss": 0.9094, "step": 12950 }, { "epoch": 0.25, "learning_rate": 1.4910642629240318e-05, "loss": 0.8503, "step": 13000 }, { "epoch": 0.26, "learning_rate": 1.4891068177814318e-05, "loss": 0.9849, "step": 13050 }, { "epoch": 0.26, "learning_rate": 1.4871493726388319e-05, "loss": 0.9683, "step": 13100 }, { "epoch": 0.26, "learning_rate": 1.485191927496232e-05, "loss": 0.9421, "step": 13150 }, { "epoch": 0.26, "learning_rate": 1.4832344823536322e-05, "loss": 0.8789, "step": 13200 }, { "epoch": 0.26, "learning_rate": 1.4812770372110323e-05, "loss": 0.8922, "step": 13250 }, { "epoch": 0.26, "learning_rate": 1.4793195920684324e-05, "loss": 0.8455, "step": 13300 }, { "epoch": 0.26, "learning_rate": 1.4773621469258326e-05, "loss": 0.8357, "step": 13350 }, { "epoch": 0.26, "learning_rate": 1.4754047017832327e-05, "loss": 0.8953, "step": 13400 }, { "epoch": 0.26, "learning_rate": 1.4734472566406328e-05, "loss": 0.8378, "step": 13450 }, { "epoch": 0.26, "learning_rate": 1.471489811498033e-05, "loss": 0.8787, "step": 13500 }, { "epoch": 0.27, "learning_rate": 1.4695323663554331e-05, "loss": 0.8001, "step": 13550 }, { "epoch": 0.27, "learning_rate": 1.4675749212128332e-05, "loss": 0.9104, "step": 13600 }, { "epoch": 0.27, "learning_rate": 1.4656174760702334e-05, "loss": 0.8789, "step": 13650 }, { "epoch": 0.27, "learning_rate": 1.4636600309276333e-05, "loss": 0.8946, "step": 13700 }, { "epoch": 0.27, "learning_rate": 1.4617025857850335e-05, "loss": 0.9366, "step": 13750 }, { "epoch": 0.27, "learning_rate": 1.4597451406424336e-05, "loss": 0.9516, "step": 13800 }, { "epoch": 0.27, "learning_rate": 1.4577876954998337e-05, "loss": 0.9444, "step": 13850 }, { "epoch": 0.27, "learning_rate": 1.4558302503572339e-05, "loss": 0.8554, "step": 13900 }, { "epoch": 0.27, "learning_rate": 1.453872805214634e-05, "loss": 0.9004, "step": 13950 }, { "epoch": 0.27, "learning_rate": 1.4519153600720341e-05, "loss": 0.7618, "step": 14000 }, { "epoch": 0.28, "learning_rate": 1.4499579149294343e-05, "loss": 0.8806, "step": 14050 }, { "epoch": 0.28, "learning_rate": 1.4480004697868344e-05, "loss": 0.901, "step": 14100 }, { "epoch": 0.28, "learning_rate": 1.4460430246442345e-05, "loss": 0.8916, "step": 14150 }, { "epoch": 0.28, "learning_rate": 1.4440855795016347e-05, "loss": 0.8715, "step": 14200 }, { "epoch": 0.28, "learning_rate": 1.4421281343590348e-05, "loss": 0.8867, "step": 14250 }, { "epoch": 0.28, "learning_rate": 1.440170689216435e-05, "loss": 0.8262, "step": 14300 }, { "epoch": 0.28, "learning_rate": 1.438213244073835e-05, "loss": 0.8875, "step": 14350 }, { "epoch": 0.28, "learning_rate": 1.436255798931235e-05, "loss": 0.8602, "step": 14400 }, { "epoch": 0.28, "learning_rate": 1.4342983537886352e-05, "loss": 0.9087, "step": 14450 }, { "epoch": 0.28, "learning_rate": 1.4323409086460353e-05, "loss": 0.8778, "step": 14500 }, { "epoch": 0.28, "learning_rate": 1.4303834635034355e-05, "loss": 0.8652, "step": 14550 }, { "epoch": 0.29, "learning_rate": 1.4284260183608356e-05, "loss": 0.8563, "step": 14600 }, { "epoch": 0.29, "learning_rate": 1.4264685732182357e-05, "loss": 0.8385, "step": 14650 }, { "epoch": 0.29, "learning_rate": 1.4245111280756359e-05, "loss": 0.8856, "step": 14700 }, { "epoch": 0.29, "learning_rate": 1.422553682933036e-05, "loss": 0.8771, "step": 14750 }, { "epoch": 0.29, "learning_rate": 1.4205962377904361e-05, "loss": 0.8611, "step": 14800 }, { "epoch": 0.29, "learning_rate": 1.4186387926478363e-05, "loss": 0.9059, "step": 14850 }, { "epoch": 0.29, "learning_rate": 1.4166813475052364e-05, "loss": 0.9306, "step": 14900 }, { "epoch": 0.29, "learning_rate": 1.4147239023626365e-05, "loss": 0.8578, "step": 14950 }, { "epoch": 0.29, "learning_rate": 1.4127664572200367e-05, "loss": 0.8826, "step": 15000 }, { "epoch": 0.29, "learning_rate": 1.4108090120774366e-05, "loss": 0.9013, "step": 15050 }, { "epoch": 0.3, "learning_rate": 1.4088515669348368e-05, "loss": 0.8291, "step": 15100 }, { "epoch": 0.3, "learning_rate": 1.4068941217922369e-05, "loss": 0.9133, "step": 15150 }, { "epoch": 0.3, "learning_rate": 1.404936676649637e-05, "loss": 0.7908, "step": 15200 }, { "epoch": 0.3, "learning_rate": 1.4029792315070372e-05, "loss": 0.8334, "step": 15250 }, { "epoch": 0.3, "learning_rate": 1.4010217863644373e-05, "loss": 0.8911, "step": 15300 }, { "epoch": 0.3, "learning_rate": 1.3990643412218374e-05, "loss": 0.8854, "step": 15350 }, { "epoch": 0.3, "learning_rate": 1.3971068960792376e-05, "loss": 0.9154, "step": 15400 }, { "epoch": 0.3, "learning_rate": 1.3951494509366377e-05, "loss": 0.891, "step": 15450 }, { "epoch": 0.3, "learning_rate": 1.3931920057940378e-05, "loss": 0.7739, "step": 15500 }, { "epoch": 0.3, "learning_rate": 1.391234560651438e-05, "loss": 0.8708, "step": 15550 }, { "epoch": 0.31, "learning_rate": 1.3892771155088381e-05, "loss": 0.7749, "step": 15600 }, { "epoch": 0.31, "learning_rate": 1.3873196703662382e-05, "loss": 0.9595, "step": 15650 }, { "epoch": 0.31, "learning_rate": 1.3853622252236382e-05, "loss": 0.8669, "step": 15700 }, { "epoch": 0.31, "learning_rate": 1.3834047800810383e-05, "loss": 0.9046, "step": 15750 }, { "epoch": 0.31, "learning_rate": 1.3814473349384385e-05, "loss": 0.8351, "step": 15800 }, { "epoch": 0.31, "learning_rate": 1.3794898897958386e-05, "loss": 0.9234, "step": 15850 }, { "epoch": 0.31, "learning_rate": 1.3775324446532387e-05, "loss": 0.8685, "step": 15900 }, { "epoch": 0.31, "learning_rate": 1.3755749995106389e-05, "loss": 0.8342, "step": 15950 }, { "epoch": 0.31, "learning_rate": 1.373617554368039e-05, "loss": 0.8972, "step": 16000 }, { "epoch": 0.31, "learning_rate": 1.3716601092254391e-05, "loss": 0.8457, "step": 16050 }, { "epoch": 0.32, "learning_rate": 1.3697026640828393e-05, "loss": 0.8288, "step": 16100 }, { "epoch": 0.32, "learning_rate": 1.3677452189402394e-05, "loss": 0.966, "step": 16150 }, { "epoch": 0.32, "learning_rate": 1.3657877737976395e-05, "loss": 0.9036, "step": 16200 }, { "epoch": 0.32, "learning_rate": 1.3638303286550397e-05, "loss": 0.8774, "step": 16250 }, { "epoch": 0.32, "learning_rate": 1.3618728835124398e-05, "loss": 0.9368, "step": 16300 }, { "epoch": 0.32, "learning_rate": 1.35991543836984e-05, "loss": 0.8784, "step": 16350 }, { "epoch": 0.32, "learning_rate": 1.3579579932272399e-05, "loss": 0.8421, "step": 16400 }, { "epoch": 0.32, "learning_rate": 1.35600054808464e-05, "loss": 0.8555, "step": 16450 }, { "epoch": 0.32, "learning_rate": 1.3540431029420402e-05, "loss": 0.8573, "step": 16500 }, { "epoch": 0.32, "learning_rate": 1.3520856577994403e-05, "loss": 0.8638, "step": 16550 }, { "epoch": 0.32, "learning_rate": 1.3501282126568404e-05, "loss": 0.924, "step": 16600 }, { "epoch": 0.33, "learning_rate": 1.3481707675142406e-05, "loss": 0.8371, "step": 16650 }, { "epoch": 0.33, "learning_rate": 1.3462133223716407e-05, "loss": 0.8086, "step": 16700 }, { "epoch": 0.33, "learning_rate": 1.3442558772290408e-05, "loss": 0.8677, "step": 16750 }, { "epoch": 0.33, "learning_rate": 1.342298432086441e-05, "loss": 0.8288, "step": 16800 }, { "epoch": 0.33, "learning_rate": 1.3403409869438411e-05, "loss": 0.7825, "step": 16850 }, { "epoch": 0.33, "learning_rate": 1.3383835418012412e-05, "loss": 0.7759, "step": 16900 }, { "epoch": 0.33, "learning_rate": 1.3364260966586414e-05, "loss": 0.9239, "step": 16950 }, { "epoch": 0.33, "learning_rate": 1.3344686515160415e-05, "loss": 0.8655, "step": 17000 }, { "epoch": 0.33, "learning_rate": 1.3325112063734415e-05, "loss": 0.8756, "step": 17050 }, { "epoch": 0.33, "learning_rate": 1.3305537612308416e-05, "loss": 0.8513, "step": 17100 }, { "epoch": 0.34, "learning_rate": 1.3285963160882417e-05, "loss": 0.8829, "step": 17150 }, { "epoch": 0.34, "learning_rate": 1.3266388709456419e-05, "loss": 0.7919, "step": 17200 }, { "epoch": 0.34, "learning_rate": 1.324681425803042e-05, "loss": 0.7929, "step": 17250 }, { "epoch": 0.34, "learning_rate": 1.3227239806604421e-05, "loss": 0.9112, "step": 17300 }, { "epoch": 0.34, "learning_rate": 1.3207665355178423e-05, "loss": 0.8695, "step": 17350 }, { "epoch": 0.34, "learning_rate": 1.3188090903752424e-05, "loss": 0.8948, "step": 17400 }, { "epoch": 0.34, "learning_rate": 1.3168516452326425e-05, "loss": 0.8628, "step": 17450 }, { "epoch": 0.34, "learning_rate": 1.3148942000900427e-05, "loss": 0.848, "step": 17500 }, { "epoch": 0.34, "learning_rate": 1.3129367549474428e-05, "loss": 0.9755, "step": 17550 }, { "epoch": 0.34, "learning_rate": 1.310979309804843e-05, "loss": 0.9757, "step": 17600 }, { "epoch": 0.35, "learning_rate": 1.3090218646622431e-05, "loss": 0.873, "step": 17650 }, { "epoch": 0.35, "learning_rate": 1.307064419519643e-05, "loss": 0.8937, "step": 17700 }, { "epoch": 0.35, "learning_rate": 1.3051069743770432e-05, "loss": 0.8326, "step": 17750 }, { "epoch": 0.35, "learning_rate": 1.3031495292344433e-05, "loss": 0.8749, "step": 17800 }, { "epoch": 0.35, "learning_rate": 1.3011920840918435e-05, "loss": 0.8842, "step": 17850 }, { "epoch": 0.35, "learning_rate": 1.2992346389492436e-05, "loss": 0.8405, "step": 17900 }, { "epoch": 0.35, "learning_rate": 1.2972771938066437e-05, "loss": 0.8236, "step": 17950 }, { "epoch": 0.35, "learning_rate": 1.2953197486640439e-05, "loss": 0.9123, "step": 18000 }, { "epoch": 0.35, "learning_rate": 1.293362303521444e-05, "loss": 0.868, "step": 18050 }, { "epoch": 0.35, "learning_rate": 1.2914048583788441e-05, "loss": 0.8031, "step": 18100 }, { "epoch": 0.36, "learning_rate": 1.2894474132362443e-05, "loss": 0.855, "step": 18150 }, { "epoch": 0.36, "learning_rate": 1.2874899680936444e-05, "loss": 0.8214, "step": 18200 }, { "epoch": 0.36, "learning_rate": 1.2855325229510445e-05, "loss": 0.7972, "step": 18250 }, { "epoch": 0.36, "learning_rate": 1.2835750778084447e-05, "loss": 0.9112, "step": 18300 }, { "epoch": 0.36, "learning_rate": 1.2816176326658448e-05, "loss": 0.9455, "step": 18350 }, { "epoch": 0.36, "learning_rate": 1.2796601875232448e-05, "loss": 0.8682, "step": 18400 }, { "epoch": 0.36, "learning_rate": 1.2777027423806449e-05, "loss": 0.8356, "step": 18450 }, { "epoch": 0.36, "learning_rate": 1.275745297238045e-05, "loss": 0.9152, "step": 18500 }, { "epoch": 0.36, "learning_rate": 1.2737878520954452e-05, "loss": 0.8404, "step": 18550 }, { "epoch": 0.36, "learning_rate": 1.2718304069528453e-05, "loss": 0.8722, "step": 18600 }, { "epoch": 0.37, "learning_rate": 1.2698729618102454e-05, "loss": 0.7857, "step": 18650 }, { "epoch": 0.37, "learning_rate": 1.2679155166676456e-05, "loss": 0.9105, "step": 18700 }, { "epoch": 0.37, "learning_rate": 1.2659580715250457e-05, "loss": 0.8208, "step": 18750 }, { "epoch": 0.37, "learning_rate": 1.2640006263824458e-05, "loss": 0.8793, "step": 18800 }, { "epoch": 0.37, "learning_rate": 1.262043181239846e-05, "loss": 0.8134, "step": 18850 }, { "epoch": 0.37, "learning_rate": 1.2600857360972461e-05, "loss": 0.9198, "step": 18900 }, { "epoch": 0.37, "learning_rate": 1.2581282909546462e-05, "loss": 0.8148, "step": 18950 }, { "epoch": 0.37, "learning_rate": 1.2561708458120464e-05, "loss": 0.8317, "step": 19000 }, { "epoch": 0.37, "learning_rate": 1.2542134006694463e-05, "loss": 0.8664, "step": 19050 }, { "epoch": 0.37, "learning_rate": 1.2522559555268465e-05, "loss": 0.9378, "step": 19100 }, { "epoch": 0.37, "learning_rate": 1.2502985103842466e-05, "loss": 0.8184, "step": 19150 }, { "epoch": 0.38, "learning_rate": 1.2483410652416467e-05, "loss": 0.8996, "step": 19200 }, { "epoch": 0.38, "learning_rate": 1.2463836200990469e-05, "loss": 0.9632, "step": 19250 }, { "epoch": 0.38, "learning_rate": 1.244426174956447e-05, "loss": 0.8904, "step": 19300 }, { "epoch": 0.38, "learning_rate": 1.2424687298138471e-05, "loss": 0.8046, "step": 19350 }, { "epoch": 0.38, "learning_rate": 1.2405112846712473e-05, "loss": 0.8562, "step": 19400 }, { "epoch": 0.38, "learning_rate": 1.2385538395286474e-05, "loss": 0.8907, "step": 19450 }, { "epoch": 0.38, "learning_rate": 1.2365963943860475e-05, "loss": 0.8435, "step": 19500 }, { "epoch": 0.38, "learning_rate": 1.2346389492434477e-05, "loss": 0.8589, "step": 19550 }, { "epoch": 0.38, "learning_rate": 1.2326815041008478e-05, "loss": 0.9204, "step": 19600 }, { "epoch": 0.38, "learning_rate": 1.230724058958248e-05, "loss": 0.886, "step": 19650 }, { "epoch": 0.39, "learning_rate": 1.228766613815648e-05, "loss": 0.8723, "step": 19700 }, { "epoch": 0.39, "learning_rate": 1.226809168673048e-05, "loss": 0.9037, "step": 19750 }, { "epoch": 0.39, "learning_rate": 1.2248517235304482e-05, "loss": 0.8555, "step": 19800 }, { "epoch": 0.39, "learning_rate": 1.2228942783878483e-05, "loss": 0.8601, "step": 19850 }, { "epoch": 0.39, "learning_rate": 1.2209368332452484e-05, "loss": 0.818, "step": 19900 }, { "epoch": 0.39, "learning_rate": 1.2189793881026486e-05, "loss": 0.8717, "step": 19950 }, { "epoch": 0.39, "learning_rate": 1.2170219429600487e-05, "loss": 0.8883, "step": 20000 }, { "epoch": 0.39, "learning_rate": 1.2150644978174488e-05, "loss": 0.814, "step": 20050 }, { "epoch": 0.39, "learning_rate": 1.213107052674849e-05, "loss": 0.9047, "step": 20100 }, { "epoch": 0.39, "learning_rate": 1.2111496075322491e-05, "loss": 0.9227, "step": 20150 }, { "epoch": 0.4, "learning_rate": 1.2091921623896492e-05, "loss": 0.971, "step": 20200 }, { "epoch": 0.4, "learning_rate": 1.2072347172470494e-05, "loss": 0.8127, "step": 20250 }, { "epoch": 0.4, "learning_rate": 1.2052772721044495e-05, "loss": 0.8432, "step": 20300 }, { "epoch": 0.4, "learning_rate": 1.2033198269618496e-05, "loss": 0.8525, "step": 20350 }, { "epoch": 0.4, "learning_rate": 1.2013623818192496e-05, "loss": 0.8678, "step": 20400 }, { "epoch": 0.4, "learning_rate": 1.1994049366766497e-05, "loss": 0.8416, "step": 20450 }, { "epoch": 0.4, "learning_rate": 1.1974474915340499e-05, "loss": 0.8313, "step": 20500 }, { "epoch": 0.4, "learning_rate": 1.19549004639145e-05, "loss": 0.8867, "step": 20550 }, { "epoch": 0.4, "learning_rate": 1.1935326012488501e-05, "loss": 0.8573, "step": 20600 }, { "epoch": 0.4, "learning_rate": 1.1915751561062503e-05, "loss": 0.8349, "step": 20650 }, { "epoch": 0.41, "learning_rate": 1.1896177109636504e-05, "loss": 0.8767, "step": 20700 }, { "epoch": 0.41, "learning_rate": 1.1876602658210506e-05, "loss": 0.9189, "step": 20750 }, { "epoch": 0.41, "learning_rate": 1.1857028206784507e-05, "loss": 0.8946, "step": 20800 }, { "epoch": 0.41, "learning_rate": 1.1837453755358508e-05, "loss": 0.7432, "step": 20850 }, { "epoch": 0.41, "learning_rate": 1.181787930393251e-05, "loss": 0.9358, "step": 20900 }, { "epoch": 0.41, "learning_rate": 1.1798304852506511e-05, "loss": 0.8968, "step": 20950 }, { "epoch": 0.41, "learning_rate": 1.1778730401080512e-05, "loss": 0.7498, "step": 21000 }, { "epoch": 0.41, "learning_rate": 1.1759155949654512e-05, "loss": 0.8436, "step": 21050 }, { "epoch": 0.41, "learning_rate": 1.1739581498228513e-05, "loss": 0.8336, "step": 21100 }, { "epoch": 0.41, "learning_rate": 1.1720007046802515e-05, "loss": 0.8154, "step": 21150 }, { "epoch": 0.41, "learning_rate": 1.1700432595376516e-05, "loss": 0.8352, "step": 21200 }, { "epoch": 0.42, "learning_rate": 1.1680858143950517e-05, "loss": 0.84, "step": 21250 }, { "epoch": 0.42, "learning_rate": 1.1661283692524519e-05, "loss": 0.9231, "step": 21300 }, { "epoch": 0.42, "learning_rate": 1.164170924109852e-05, "loss": 0.8819, "step": 21350 }, { "epoch": 0.42, "learning_rate": 1.1622134789672521e-05, "loss": 0.8829, "step": 21400 }, { "epoch": 0.42, "learning_rate": 1.1602560338246523e-05, "loss": 0.9224, "step": 21450 }, { "epoch": 0.42, "learning_rate": 1.1582985886820524e-05, "loss": 0.8652, "step": 21500 }, { "epoch": 0.42, "learning_rate": 1.1563411435394525e-05, "loss": 0.8843, "step": 21550 }, { "epoch": 0.42, "learning_rate": 1.1543836983968527e-05, "loss": 0.8513, "step": 21600 }, { "epoch": 0.42, "learning_rate": 1.1524262532542528e-05, "loss": 0.9332, "step": 21650 }, { "epoch": 0.42, "learning_rate": 1.150468808111653e-05, "loss": 0.8782, "step": 21700 }, { "epoch": 0.43, "learning_rate": 1.1485113629690529e-05, "loss": 0.8213, "step": 21750 }, { "epoch": 0.43, "learning_rate": 1.146553917826453e-05, "loss": 0.9054, "step": 21800 }, { "epoch": 0.43, "learning_rate": 1.1445964726838532e-05, "loss": 0.913, "step": 21850 }, { "epoch": 0.43, "learning_rate": 1.1426390275412533e-05, "loss": 0.9431, "step": 21900 }, { "epoch": 0.43, "learning_rate": 1.1406815823986534e-05, "loss": 0.889, "step": 21950 }, { "epoch": 0.43, "learning_rate": 1.1387241372560536e-05, "loss": 0.8428, "step": 22000 }, { "epoch": 0.43, "learning_rate": 1.1367666921134537e-05, "loss": 0.8754, "step": 22050 }, { "epoch": 0.43, "learning_rate": 1.1348092469708538e-05, "loss": 0.8986, "step": 22100 }, { "epoch": 0.43, "learning_rate": 1.132851801828254e-05, "loss": 0.7835, "step": 22150 }, { "epoch": 0.43, "learning_rate": 1.1308943566856541e-05, "loss": 0.9178, "step": 22200 }, { "epoch": 0.44, "learning_rate": 1.1289369115430542e-05, "loss": 0.8806, "step": 22250 }, { "epoch": 0.44, "learning_rate": 1.1269794664004544e-05, "loss": 0.8896, "step": 22300 }, { "epoch": 0.44, "learning_rate": 1.1250220212578545e-05, "loss": 0.8652, "step": 22350 }, { "epoch": 0.44, "learning_rate": 1.1230645761152545e-05, "loss": 0.959, "step": 22400 }, { "epoch": 0.44, "learning_rate": 1.1211071309726546e-05, "loss": 0.9101, "step": 22450 }, { "epoch": 0.44, "learning_rate": 1.1191496858300547e-05, "loss": 0.9111, "step": 22500 }, { "epoch": 0.44, "learning_rate": 1.1171922406874549e-05, "loss": 0.8679, "step": 22550 }, { "epoch": 0.44, "learning_rate": 1.115234795544855e-05, "loss": 0.9192, "step": 22600 }, { "epoch": 0.44, "learning_rate": 1.1132773504022551e-05, "loss": 0.869, "step": 22650 }, { "epoch": 0.44, "learning_rate": 1.1113199052596553e-05, "loss": 0.8669, "step": 22700 }, { "epoch": 0.45, "learning_rate": 1.1093624601170554e-05, "loss": 0.7818, "step": 22750 }, { "epoch": 0.45, "learning_rate": 1.1074050149744555e-05, "loss": 0.8594, "step": 22800 }, { "epoch": 0.45, "learning_rate": 1.1054475698318557e-05, "loss": 0.8431, "step": 22850 }, { "epoch": 0.45, "learning_rate": 1.1034901246892558e-05, "loss": 0.8806, "step": 22900 }, { "epoch": 0.45, "learning_rate": 1.101532679546656e-05, "loss": 0.9083, "step": 22950 }, { "epoch": 0.45, "learning_rate": 1.099575234404056e-05, "loss": 0.8489, "step": 23000 }, { "epoch": 0.45, "learning_rate": 1.097617789261456e-05, "loss": 0.9215, "step": 23050 }, { "epoch": 0.45, "learning_rate": 1.0956603441188562e-05, "loss": 0.8501, "step": 23100 }, { "epoch": 0.45, "learning_rate": 1.0937028989762563e-05, "loss": 0.8669, "step": 23150 }, { "epoch": 0.45, "learning_rate": 1.0917454538336564e-05, "loss": 0.8635, "step": 23200 }, { "epoch": 0.46, "learning_rate": 1.0897880086910566e-05, "loss": 0.8314, "step": 23250 }, { "epoch": 0.46, "learning_rate": 1.0878305635484567e-05, "loss": 0.9476, "step": 23300 }, { "epoch": 0.46, "learning_rate": 1.0858731184058568e-05, "loss": 0.9073, "step": 23350 }, { "epoch": 0.46, "learning_rate": 1.083915673263257e-05, "loss": 0.8649, "step": 23400 }, { "epoch": 0.46, "learning_rate": 1.0819582281206571e-05, "loss": 0.8369, "step": 23450 }, { "epoch": 0.46, "learning_rate": 1.0800007829780572e-05, "loss": 0.8857, "step": 23500 }, { "epoch": 0.46, "learning_rate": 1.0780433378354574e-05, "loss": 0.7556, "step": 23550 }, { "epoch": 0.46, "learning_rate": 1.0760858926928575e-05, "loss": 0.8351, "step": 23600 }, { "epoch": 0.46, "learning_rate": 1.0741284475502576e-05, "loss": 0.8619, "step": 23650 }, { "epoch": 0.46, "learning_rate": 1.0721710024076578e-05, "loss": 0.8475, "step": 23700 }, { "epoch": 0.46, "learning_rate": 1.0702135572650577e-05, "loss": 0.8963, "step": 23750 }, { "epoch": 0.47, "learning_rate": 1.0682561121224579e-05, "loss": 0.8072, "step": 23800 }, { "epoch": 0.47, "learning_rate": 1.066298666979858e-05, "loss": 0.7855, "step": 23850 }, { "epoch": 0.47, "learning_rate": 1.0643412218372582e-05, "loss": 0.9233, "step": 23900 }, { "epoch": 0.47, "learning_rate": 1.0623837766946583e-05, "loss": 0.8856, "step": 23950 }, { "epoch": 0.47, "learning_rate": 1.0604263315520584e-05, "loss": 0.8831, "step": 24000 }, { "epoch": 0.47, "learning_rate": 1.0584688864094586e-05, "loss": 0.7711, "step": 24050 }, { "epoch": 0.47, "learning_rate": 1.0565114412668587e-05, "loss": 0.8804, "step": 24100 }, { "epoch": 0.47, "learning_rate": 1.0545539961242588e-05, "loss": 0.8166, "step": 24150 }, { "epoch": 0.47, "learning_rate": 1.052596550981659e-05, "loss": 0.88, "step": 24200 }, { "epoch": 0.47, "learning_rate": 1.0506391058390591e-05, "loss": 0.8698, "step": 24250 }, { "epoch": 0.48, "learning_rate": 1.0486816606964592e-05, "loss": 0.8684, "step": 24300 }, { "epoch": 0.48, "learning_rate": 1.0467242155538594e-05, "loss": 0.93, "step": 24350 }, { "epoch": 0.48, "learning_rate": 1.0447667704112593e-05, "loss": 0.8597, "step": 24400 }, { "epoch": 0.48, "learning_rate": 1.0428093252686595e-05, "loss": 0.9301, "step": 24450 }, { "epoch": 0.48, "learning_rate": 1.0408518801260596e-05, "loss": 0.8598, "step": 24500 }, { "epoch": 0.48, "learning_rate": 1.0388944349834597e-05, "loss": 0.9186, "step": 24550 }, { "epoch": 0.48, "learning_rate": 1.0369369898408599e-05, "loss": 0.8626, "step": 24600 }, { "epoch": 0.48, "learning_rate": 1.03497954469826e-05, "loss": 0.8402, "step": 24650 }, { "epoch": 0.48, "learning_rate": 1.0330220995556601e-05, "loss": 0.8344, "step": 24700 }, { "epoch": 0.48, "learning_rate": 1.0310646544130603e-05, "loss": 0.7338, "step": 24750 }, { "epoch": 0.49, "learning_rate": 1.0291072092704604e-05, "loss": 0.924, "step": 24800 }, { "epoch": 0.49, "learning_rate": 1.0271497641278605e-05, "loss": 0.8656, "step": 24850 }, { "epoch": 0.49, "learning_rate": 1.0251923189852607e-05, "loss": 0.8263, "step": 24900 }, { "epoch": 0.49, "learning_rate": 1.0232348738426608e-05, "loss": 0.8556, "step": 24950 }, { "epoch": 0.49, "learning_rate": 1.021277428700061e-05, "loss": 0.8331, "step": 25000 }, { "epoch": 0.49, "learning_rate": 1.0193199835574609e-05, "loss": 0.7991, "step": 25050 }, { "epoch": 0.49, "learning_rate": 1.017362538414861e-05, "loss": 0.7974, "step": 25100 }, { "epoch": 0.49, "learning_rate": 1.0154050932722612e-05, "loss": 0.8658, "step": 25150 }, { "epoch": 0.49, "learning_rate": 1.0134476481296613e-05, "loss": 0.8685, "step": 25200 }, { "epoch": 0.49, "learning_rate": 1.0114902029870614e-05, "loss": 0.9491, "step": 25250 }, { "epoch": 0.5, "learning_rate": 1.0095327578444616e-05, "loss": 0.8939, "step": 25300 }, { "epoch": 0.5, "learning_rate": 1.0075753127018617e-05, "loss": 0.8762, "step": 25350 }, { "epoch": 0.5, "learning_rate": 1.0056178675592618e-05, "loss": 0.8753, "step": 25400 }, { "epoch": 0.5, "learning_rate": 1.003660422416662e-05, "loss": 0.7322, "step": 25450 }, { "epoch": 0.5, "learning_rate": 1.0017029772740621e-05, "loss": 0.9321, "step": 25500 }, { "epoch": 0.5, "learning_rate": 9.99745532131462e-06, "loss": 0.8506, "step": 25550 }, { "epoch": 0.5, "learning_rate": 9.977880869888622e-06, "loss": 0.8747, "step": 25600 }, { "epoch": 0.5, "learning_rate": 9.958306418462623e-06, "loss": 0.8376, "step": 25650 }, { "epoch": 0.5, "learning_rate": 9.938731967036625e-06, "loss": 0.8633, "step": 25700 }, { "epoch": 0.5, "learning_rate": 9.919157515610626e-06, "loss": 0.843, "step": 25750 }, { "epoch": 0.51, "learning_rate": 9.899583064184627e-06, "loss": 0.9029, "step": 25800 }, { "epoch": 0.51, "learning_rate": 9.880008612758629e-06, "loss": 0.9153, "step": 25850 }, { "epoch": 0.51, "learning_rate": 9.86043416133263e-06, "loss": 0.8513, "step": 25900 }, { "epoch": 0.51, "learning_rate": 9.84085970990663e-06, "loss": 0.8539, "step": 25950 }, { "epoch": 0.51, "learning_rate": 9.821285258480631e-06, "loss": 0.8501, "step": 26000 }, { "epoch": 0.51, "learning_rate": 9.801710807054632e-06, "loss": 0.931, "step": 26050 }, { "epoch": 0.51, "learning_rate": 9.782136355628634e-06, "loss": 0.8488, "step": 26100 }, { "epoch": 0.51, "learning_rate": 9.762561904202635e-06, "loss": 0.9098, "step": 26150 }, { "epoch": 0.51, "learning_rate": 9.742987452776636e-06, "loss": 0.8358, "step": 26200 }, { "epoch": 0.51, "learning_rate": 9.723413001350638e-06, "loss": 0.8837, "step": 26250 }, { "epoch": 0.51, "learning_rate": 9.703838549924639e-06, "loss": 0.7796, "step": 26300 }, { "epoch": 0.52, "learning_rate": 9.68426409849864e-06, "loss": 0.8227, "step": 26350 }, { "epoch": 0.52, "learning_rate": 9.664689647072642e-06, "loss": 0.8286, "step": 26400 }, { "epoch": 0.52, "learning_rate": 9.645115195646643e-06, "loss": 0.9235, "step": 26450 }, { "epoch": 0.52, "learning_rate": 9.625540744220644e-06, "loss": 0.845, "step": 26500 }, { "epoch": 0.52, "learning_rate": 9.605966292794646e-06, "loss": 0.7694, "step": 26550 }, { "epoch": 0.52, "learning_rate": 9.586391841368647e-06, "loss": 0.9169, "step": 26600 }, { "epoch": 0.52, "learning_rate": 9.566817389942647e-06, "loss": 0.9425, "step": 26650 }, { "epoch": 0.52, "learning_rate": 9.547242938516648e-06, "loss": 0.7728, "step": 26700 }, { "epoch": 0.52, "learning_rate": 9.52766848709065e-06, "loss": 0.9274, "step": 26750 }, { "epoch": 0.52, "learning_rate": 9.50809403566465e-06, "loss": 0.8754, "step": 26800 }, { "epoch": 0.53, "learning_rate": 9.488519584238652e-06, "loss": 0.8206, "step": 26850 }, { "epoch": 0.53, "learning_rate": 9.468945132812653e-06, "loss": 0.8349, "step": 26900 }, { "epoch": 0.53, "learning_rate": 9.449370681386655e-06, "loss": 0.7908, "step": 26950 }, { "epoch": 0.53, "learning_rate": 9.429796229960656e-06, "loss": 0.776, "step": 27000 }, { "epoch": 0.53, "learning_rate": 9.410221778534658e-06, "loss": 0.8717, "step": 27050 }, { "epoch": 0.53, "learning_rate": 9.390647327108659e-06, "loss": 0.8673, "step": 27100 }, { "epoch": 0.53, "learning_rate": 9.37107287568266e-06, "loss": 0.8936, "step": 27150 }, { "epoch": 0.53, "learning_rate": 9.351498424256662e-06, "loss": 0.8782, "step": 27200 }, { "epoch": 0.53, "learning_rate": 9.331923972830663e-06, "loss": 0.7767, "step": 27250 }, { "epoch": 0.53, "learning_rate": 9.312349521404663e-06, "loss": 0.8948, "step": 27300 }, { "epoch": 0.54, "learning_rate": 9.292775069978664e-06, "loss": 0.8819, "step": 27350 }, { "epoch": 0.54, "learning_rate": 9.273200618552665e-06, "loss": 0.8244, "step": 27400 }, { "epoch": 0.54, "learning_rate": 9.253626167126667e-06, "loss": 0.8453, "step": 27450 }, { "epoch": 0.54, "learning_rate": 9.234051715700668e-06, "loss": 0.8921, "step": 27500 }, { "epoch": 0.54, "learning_rate": 9.21447726427467e-06, "loss": 0.7972, "step": 27550 }, { "epoch": 0.54, "learning_rate": 9.19490281284867e-06, "loss": 0.8651, "step": 27600 }, { "epoch": 0.54, "learning_rate": 9.175328361422672e-06, "loss": 0.9117, "step": 27650 }, { "epoch": 0.54, "learning_rate": 9.155753909996673e-06, "loss": 0.8853, "step": 27700 }, { "epoch": 0.54, "learning_rate": 9.136179458570675e-06, "loss": 0.8501, "step": 27750 }, { "epoch": 0.54, "learning_rate": 9.116605007144676e-06, "loss": 0.8758, "step": 27800 }, { "epoch": 0.55, "learning_rate": 9.097030555718677e-06, "loss": 0.846, "step": 27850 }, { "epoch": 0.55, "learning_rate": 9.077456104292679e-06, "loss": 0.9889, "step": 27900 }, { "epoch": 0.55, "learning_rate": 9.057881652866678e-06, "loss": 0.8388, "step": 27950 }, { "epoch": 0.55, "learning_rate": 9.03830720144068e-06, "loss": 0.8852, "step": 28000 }, { "epoch": 0.55, "learning_rate": 9.018732750014681e-06, "loss": 0.8383, "step": 28050 }, { "epoch": 0.55, "learning_rate": 8.999158298588682e-06, "loss": 0.8078, "step": 28100 }, { "epoch": 0.55, "learning_rate": 8.979583847162684e-06, "loss": 0.8822, "step": 28150 }, { "epoch": 0.55, "learning_rate": 8.960009395736685e-06, "loss": 0.8824, "step": 28200 }, { "epoch": 0.55, "learning_rate": 8.940434944310686e-06, "loss": 0.8647, "step": 28250 }, { "epoch": 0.55, "learning_rate": 8.920860492884688e-06, "loss": 0.8348, "step": 28300 }, { "epoch": 0.55, "learning_rate": 8.901286041458689e-06, "loss": 0.7925, "step": 28350 }, { "epoch": 0.56, "learning_rate": 8.88171159003269e-06, "loss": 0.9533, "step": 28400 }, { "epoch": 0.56, "learning_rate": 8.862137138606692e-06, "loss": 0.8684, "step": 28450 }, { "epoch": 0.56, "learning_rate": 8.842562687180693e-06, "loss": 0.8617, "step": 28500 }, { "epoch": 0.56, "learning_rate": 8.822988235754694e-06, "loss": 0.8674, "step": 28550 }, { "epoch": 0.56, "learning_rate": 8.803413784328696e-06, "loss": 0.8993, "step": 28600 }, { "epoch": 0.56, "learning_rate": 8.783839332902695e-06, "loss": 0.8651, "step": 28650 }, { "epoch": 0.56, "learning_rate": 8.764264881476697e-06, "loss": 0.9059, "step": 28700 }, { "epoch": 0.56, "learning_rate": 8.744690430050698e-06, "loss": 0.8669, "step": 28750 }, { "epoch": 0.56, "learning_rate": 8.7251159786247e-06, "loss": 0.8995, "step": 28800 }, { "epoch": 0.56, "learning_rate": 8.7055415271987e-06, "loss": 0.7663, "step": 28850 }, { "epoch": 0.57, "learning_rate": 8.685967075772702e-06, "loss": 0.9201, "step": 28900 }, { "epoch": 0.57, "learning_rate": 8.666392624346703e-06, "loss": 0.8251, "step": 28950 }, { "epoch": 0.57, "learning_rate": 8.646818172920705e-06, "loss": 0.8798, "step": 29000 }, { "epoch": 0.57, "learning_rate": 8.627243721494706e-06, "loss": 0.8492, "step": 29050 }, { "epoch": 0.57, "learning_rate": 8.607669270068707e-06, "loss": 0.8379, "step": 29100 }, { "epoch": 0.57, "learning_rate": 8.588094818642709e-06, "loss": 0.8279, "step": 29150 }, { "epoch": 0.57, "learning_rate": 8.56852036721671e-06, "loss": 0.9321, "step": 29200 }, { "epoch": 0.57, "learning_rate": 8.548945915790711e-06, "loss": 0.8259, "step": 29250 }, { "epoch": 0.57, "learning_rate": 8.529371464364711e-06, "loss": 0.945, "step": 29300 }, { "epoch": 0.57, "learning_rate": 8.509797012938712e-06, "loss": 0.8367, "step": 29350 }, { "epoch": 0.58, "learning_rate": 8.490222561512714e-06, "loss": 0.7596, "step": 29400 }, { "epoch": 0.58, "learning_rate": 8.470648110086715e-06, "loss": 0.9031, "step": 29450 }, { "epoch": 0.58, "learning_rate": 8.451073658660716e-06, "loss": 0.9436, "step": 29500 }, { "epoch": 0.58, "learning_rate": 8.431499207234718e-06, "loss": 0.8308, "step": 29550 }, { "epoch": 0.58, "learning_rate": 8.411924755808719e-06, "loss": 0.7965, "step": 29600 }, { "epoch": 0.58, "learning_rate": 8.39235030438272e-06, "loss": 0.8116, "step": 29650 }, { "epoch": 0.58, "learning_rate": 8.372775852956722e-06, "loss": 0.894, "step": 29700 }, { "epoch": 0.58, "learning_rate": 8.353201401530723e-06, "loss": 0.8788, "step": 29750 }, { "epoch": 0.58, "learning_rate": 8.333626950104724e-06, "loss": 0.8463, "step": 29800 }, { "epoch": 0.58, "learning_rate": 8.314052498678726e-06, "loss": 0.7681, "step": 29850 }, { "epoch": 0.59, "learning_rate": 8.294478047252727e-06, "loss": 0.8351, "step": 29900 }, { "epoch": 0.59, "learning_rate": 8.274903595826728e-06, "loss": 0.8549, "step": 29950 }, { "epoch": 0.59, "learning_rate": 8.255329144400728e-06, "loss": 0.9201, "step": 30000 }, { "epoch": 0.59, "learning_rate": 8.23575469297473e-06, "loss": 0.8351, "step": 30050 }, { "epoch": 0.59, "learning_rate": 8.21618024154873e-06, "loss": 0.8675, "step": 30100 }, { "epoch": 0.59, "learning_rate": 8.196605790122732e-06, "loss": 0.7949, "step": 30150 }, { "epoch": 0.59, "learning_rate": 8.177031338696733e-06, "loss": 0.8228, "step": 30200 }, { "epoch": 0.59, "learning_rate": 8.157456887270735e-06, "loss": 0.861, "step": 30250 }, { "epoch": 0.59, "learning_rate": 8.137882435844736e-06, "loss": 0.9081, "step": 30300 }, { "epoch": 0.59, "learning_rate": 8.118307984418738e-06, "loss": 0.7959, "step": 30350 }, { "epoch": 0.6, "learning_rate": 8.098733532992739e-06, "loss": 0.8647, "step": 30400 }, { "epoch": 0.6, "learning_rate": 8.07915908156674e-06, "loss": 0.8884, "step": 30450 }, { "epoch": 0.6, "learning_rate": 8.059584630140742e-06, "loss": 0.7761, "step": 30500 }, { "epoch": 0.6, "learning_rate": 8.040010178714743e-06, "loss": 0.9423, "step": 30550 }, { "epoch": 0.6, "learning_rate": 8.020435727288744e-06, "loss": 0.8885, "step": 30600 }, { "epoch": 0.6, "learning_rate": 8.000861275862744e-06, "loss": 0.9188, "step": 30650 }, { "epoch": 0.6, "learning_rate": 7.981286824436745e-06, "loss": 0.7685, "step": 30700 }, { "epoch": 0.6, "learning_rate": 7.961712373010747e-06, "loss": 0.8876, "step": 30750 }, { "epoch": 0.6, "learning_rate": 7.942137921584748e-06, "loss": 0.8314, "step": 30800 }, { "epoch": 0.6, "learning_rate": 7.92256347015875e-06, "loss": 0.963, "step": 30850 }, { "epoch": 0.6, "learning_rate": 7.90298901873275e-06, "loss": 0.8584, "step": 30900 }, { "epoch": 0.61, "learning_rate": 7.883414567306752e-06, "loss": 0.8097, "step": 30950 }, { "epoch": 0.61, "learning_rate": 7.863840115880753e-06, "loss": 0.868, "step": 31000 }, { "epoch": 0.61, "learning_rate": 7.844265664454755e-06, "loss": 0.8494, "step": 31050 }, { "epoch": 0.61, "learning_rate": 7.824691213028756e-06, "loss": 0.7916, "step": 31100 }, { "epoch": 0.61, "learning_rate": 7.805116761602757e-06, "loss": 0.9308, "step": 31150 }, { "epoch": 0.61, "learning_rate": 7.785542310176759e-06, "loss": 0.7219, "step": 31200 }, { "epoch": 0.61, "learning_rate": 7.76596785875076e-06, "loss": 0.8988, "step": 31250 }, { "epoch": 0.61, "learning_rate": 7.74639340732476e-06, "loss": 0.7423, "step": 31300 }, { "epoch": 0.61, "learning_rate": 7.726818955898761e-06, "loss": 0.7179, "step": 31350 }, { "epoch": 0.61, "learning_rate": 7.707244504472762e-06, "loss": 0.9416, "step": 31400 }, { "epoch": 0.62, "learning_rate": 7.687670053046764e-06, "loss": 0.8769, "step": 31450 }, { "epoch": 0.62, "learning_rate": 7.668095601620765e-06, "loss": 0.992, "step": 31500 }, { "epoch": 0.62, "learning_rate": 7.648521150194766e-06, "loss": 0.8055, "step": 31550 }, { "epoch": 0.62, "learning_rate": 7.628946698768768e-06, "loss": 0.8286, "step": 31600 }, { "epoch": 0.62, "learning_rate": 7.609372247342769e-06, "loss": 0.7925, "step": 31650 }, { "epoch": 0.62, "learning_rate": 7.58979779591677e-06, "loss": 0.812, "step": 31700 }, { "epoch": 0.62, "learning_rate": 7.570223344490772e-06, "loss": 0.8058, "step": 31750 }, { "epoch": 0.62, "learning_rate": 7.550648893064772e-06, "loss": 0.8939, "step": 31800 }, { "epoch": 0.62, "learning_rate": 7.5310744416387735e-06, "loss": 0.9144, "step": 31850 }, { "epoch": 0.62, "learning_rate": 7.511499990212775e-06, "loss": 0.856, "step": 31900 }, { "epoch": 0.63, "learning_rate": 7.491925538786776e-06, "loss": 0.8356, "step": 31950 }, { "epoch": 0.63, "learning_rate": 7.4723510873607775e-06, "loss": 0.8551, "step": 32000 }, { "epoch": 0.63, "learning_rate": 7.452776635934779e-06, "loss": 0.8674, "step": 32050 }, { "epoch": 0.63, "learning_rate": 7.43320218450878e-06, "loss": 0.8289, "step": 32100 }, { "epoch": 0.63, "learning_rate": 7.413627733082781e-06, "loss": 0.9137, "step": 32150 }, { "epoch": 0.63, "learning_rate": 7.394053281656782e-06, "loss": 0.8694, "step": 32200 }, { "epoch": 0.63, "learning_rate": 7.374478830230783e-06, "loss": 0.8175, "step": 32250 }, { "epoch": 0.63, "learning_rate": 7.354904378804785e-06, "loss": 0.8521, "step": 32300 }, { "epoch": 0.63, "learning_rate": 7.335329927378786e-06, "loss": 0.8094, "step": 32350 }, { "epoch": 0.63, "learning_rate": 7.315755475952787e-06, "loss": 0.8094, "step": 32400 }, { "epoch": 0.64, "learning_rate": 7.296181024526789e-06, "loss": 0.8146, "step": 32450 }, { "epoch": 0.64, "learning_rate": 7.276606573100789e-06, "loss": 0.8444, "step": 32500 }, { "epoch": 0.64, "learning_rate": 7.257032121674791e-06, "loss": 0.8016, "step": 32550 }, { "epoch": 0.64, "learning_rate": 7.237457670248792e-06, "loss": 0.8408, "step": 32600 }, { "epoch": 0.64, "learning_rate": 7.217883218822793e-06, "loss": 0.8744, "step": 32650 }, { "epoch": 0.64, "learning_rate": 7.198308767396795e-06, "loss": 0.8412, "step": 32700 }, { "epoch": 0.64, "learning_rate": 7.178734315970796e-06, "loss": 0.8446, "step": 32750 }, { "epoch": 0.64, "learning_rate": 7.1591598645447964e-06, "loss": 0.8469, "step": 32800 }, { "epoch": 0.64, "learning_rate": 7.139585413118798e-06, "loss": 0.7875, "step": 32850 }, { "epoch": 0.64, "learning_rate": 7.120010961692799e-06, "loss": 0.8713, "step": 32900 }, { "epoch": 0.64, "learning_rate": 7.1004365102668005e-06, "loss": 0.8614, "step": 32950 }, { "epoch": 0.65, "learning_rate": 7.080862058840802e-06, "loss": 0.8067, "step": 33000 }, { "epoch": 0.65, "learning_rate": 7.061287607414803e-06, "loss": 0.8323, "step": 33050 }, { "epoch": 0.65, "learning_rate": 7.0417131559888045e-06, "loss": 0.8127, "step": 33100 }, { "epoch": 0.65, "learning_rate": 7.022138704562805e-06, "loss": 0.9393, "step": 33150 }, { "epoch": 0.65, "learning_rate": 7.002564253136806e-06, "loss": 0.8109, "step": 33200 }, { "epoch": 0.65, "learning_rate": 6.982989801710808e-06, "loss": 0.871, "step": 33250 }, { "epoch": 0.65, "learning_rate": 6.963415350284809e-06, "loss": 0.7685, "step": 33300 }, { "epoch": 0.65, "learning_rate": 6.94384089885881e-06, "loss": 0.8988, "step": 33350 }, { "epoch": 0.65, "learning_rate": 6.924266447432812e-06, "loss": 0.8079, "step": 33400 }, { "epoch": 0.65, "learning_rate": 6.904691996006813e-06, "loss": 0.8664, "step": 33450 }, { "epoch": 0.66, "learning_rate": 6.8851175445808135e-06, "loss": 0.8659, "step": 33500 }, { "epoch": 0.66, "learning_rate": 6.865543093154815e-06, "loss": 0.8087, "step": 33550 }, { "epoch": 0.66, "learning_rate": 6.845968641728816e-06, "loss": 0.9701, "step": 33600 }, { "epoch": 0.66, "learning_rate": 6.8263941903028175e-06, "loss": 0.8431, "step": 33650 }, { "epoch": 0.66, "learning_rate": 6.806819738876819e-06, "loss": 0.8664, "step": 33700 }, { "epoch": 0.66, "learning_rate": 6.78724528745082e-06, "loss": 0.8538, "step": 33750 }, { "epoch": 0.66, "learning_rate": 6.767670836024821e-06, "loss": 0.9135, "step": 33800 }, { "epoch": 0.66, "learning_rate": 6.748096384598822e-06, "loss": 0.8, "step": 33850 }, { "epoch": 0.66, "learning_rate": 6.728521933172823e-06, "loss": 0.8769, "step": 33900 }, { "epoch": 0.66, "learning_rate": 6.708947481746825e-06, "loss": 0.8763, "step": 33950 }, { "epoch": 0.67, "learning_rate": 6.689373030320826e-06, "loss": 0.8174, "step": 34000 }, { "epoch": 0.67, "learning_rate": 6.669798578894827e-06, "loss": 0.7703, "step": 34050 }, { "epoch": 0.67, "learning_rate": 6.650224127468829e-06, "loss": 0.9214, "step": 34100 }, { "epoch": 0.67, "learning_rate": 6.630649676042829e-06, "loss": 0.8517, "step": 34150 }, { "epoch": 0.67, "learning_rate": 6.611075224616831e-06, "loss": 0.8129, "step": 34200 }, { "epoch": 0.67, "learning_rate": 6.591500773190832e-06, "loss": 0.8221, "step": 34250 }, { "epoch": 0.67, "learning_rate": 6.571926321764833e-06, "loss": 0.8089, "step": 34300 }, { "epoch": 0.67, "learning_rate": 6.552351870338835e-06, "loss": 0.9105, "step": 34350 }, { "epoch": 0.67, "learning_rate": 6.532777418912836e-06, "loss": 0.7871, "step": 34400 }, { "epoch": 0.67, "learning_rate": 6.513202967486837e-06, "loss": 0.7993, "step": 34450 }, { "epoch": 0.68, "learning_rate": 6.493628516060838e-06, "loss": 0.7592, "step": 34500 }, { "epoch": 0.68, "learning_rate": 6.474054064634839e-06, "loss": 0.8226, "step": 34550 }, { "epoch": 0.68, "learning_rate": 6.4544796132088405e-06, "loss": 0.8362, "step": 34600 }, { "epoch": 0.68, "learning_rate": 6.434905161782842e-06, "loss": 0.8218, "step": 34650 }, { "epoch": 0.68, "learning_rate": 6.415330710356843e-06, "loss": 0.7943, "step": 34700 }, { "epoch": 0.68, "learning_rate": 6.3957562589308445e-06, "loss": 0.9096, "step": 34750 }, { "epoch": 0.68, "learning_rate": 6.376181807504845e-06, "loss": 0.8132, "step": 34800 }, { "epoch": 0.68, "learning_rate": 6.356607356078846e-06, "loss": 0.8449, "step": 34850 }, { "epoch": 0.68, "learning_rate": 6.337032904652848e-06, "loss": 0.8221, "step": 34900 }, { "epoch": 0.68, "learning_rate": 6.317458453226849e-06, "loss": 0.8582, "step": 34950 }, { "epoch": 0.69, "learning_rate": 6.29788400180085e-06, "loss": 0.869, "step": 35000 }, { "epoch": 0.69, "learning_rate": 6.278309550374852e-06, "loss": 0.8262, "step": 35050 }, { "epoch": 0.69, "learning_rate": 6.258735098948853e-06, "loss": 0.8728, "step": 35100 }, { "epoch": 0.69, "learning_rate": 6.2391606475228535e-06, "loss": 0.9178, "step": 35150 }, { "epoch": 0.69, "learning_rate": 6.219586196096855e-06, "loss": 0.7946, "step": 35200 }, { "epoch": 0.69, "learning_rate": 6.200011744670856e-06, "loss": 0.8229, "step": 35250 }, { "epoch": 0.69, "learning_rate": 6.1804372932448575e-06, "loss": 0.8999, "step": 35300 }, { "epoch": 0.69, "learning_rate": 6.160862841818859e-06, "loss": 0.9206, "step": 35350 }, { "epoch": 0.69, "learning_rate": 6.14128839039286e-06, "loss": 0.8875, "step": 35400 }, { "epoch": 0.69, "learning_rate": 6.1217139389668616e-06, "loss": 0.8835, "step": 35450 }, { "epoch": 0.69, "learning_rate": 6.102139487540862e-06, "loss": 0.8897, "step": 35500 }, { "epoch": 0.7, "learning_rate": 6.082565036114863e-06, "loss": 0.9601, "step": 35550 }, { "epoch": 0.7, "learning_rate": 6.062990584688865e-06, "loss": 0.7805, "step": 35600 }, { "epoch": 0.7, "learning_rate": 6.043416133262866e-06, "loss": 0.9185, "step": 35650 }, { "epoch": 0.7, "learning_rate": 6.0238416818368674e-06, "loss": 0.7948, "step": 35700 }, { "epoch": 0.7, "learning_rate": 6.004267230410869e-06, "loss": 0.8508, "step": 35750 }, { "epoch": 0.7, "learning_rate": 5.984692778984869e-06, "loss": 0.8683, "step": 35800 }, { "epoch": 0.7, "learning_rate": 5.965118327558871e-06, "loss": 0.889, "step": 35850 }, { "epoch": 0.7, "learning_rate": 5.945543876132872e-06, "loss": 0.8705, "step": 35900 }, { "epoch": 0.7, "learning_rate": 5.925969424706873e-06, "loss": 0.8509, "step": 35950 }, { "epoch": 0.7, "learning_rate": 5.906394973280875e-06, "loss": 0.8239, "step": 36000 }, { "epoch": 0.71, "learning_rate": 5.886820521854876e-06, "loss": 0.817, "step": 36050 }, { "epoch": 0.71, "learning_rate": 5.867246070428877e-06, "loss": 0.8169, "step": 36100 }, { "epoch": 0.71, "learning_rate": 5.847671619002878e-06, "loss": 0.8845, "step": 36150 }, { "epoch": 0.71, "learning_rate": 5.828097167576879e-06, "loss": 0.7943, "step": 36200 }, { "epoch": 0.71, "learning_rate": 5.8085227161508805e-06, "loss": 0.8947, "step": 36250 }, { "epoch": 0.71, "learning_rate": 5.788948264724882e-06, "loss": 0.9, "step": 36300 }, { "epoch": 0.71, "learning_rate": 5.769373813298883e-06, "loss": 0.8847, "step": 36350 }, { "epoch": 0.71, "learning_rate": 5.7497993618728845e-06, "loss": 0.868, "step": 36400 }, { "epoch": 0.71, "learning_rate": 5.730224910446886e-06, "loss": 0.7603, "step": 36450 }, { "epoch": 0.71, "learning_rate": 5.710650459020886e-06, "loss": 0.8632, "step": 36500 }, { "epoch": 0.72, "learning_rate": 5.691076007594888e-06, "loss": 0.8426, "step": 36550 }, { "epoch": 0.72, "learning_rate": 5.671501556168889e-06, "loss": 0.939, "step": 36600 }, { "epoch": 0.72, "learning_rate": 5.65192710474289e-06, "loss": 0.8623, "step": 36650 }, { "epoch": 0.72, "learning_rate": 5.632352653316892e-06, "loss": 0.8796, "step": 36700 }, { "epoch": 0.72, "learning_rate": 5.612778201890893e-06, "loss": 0.9162, "step": 36750 }, { "epoch": 0.72, "learning_rate": 5.5932037504648935e-06, "loss": 0.7553, "step": 36800 }, { "epoch": 0.72, "learning_rate": 5.573629299038895e-06, "loss": 0.8613, "step": 36850 }, { "epoch": 0.72, "learning_rate": 5.554054847612896e-06, "loss": 0.9001, "step": 36900 }, { "epoch": 0.72, "learning_rate": 5.5344803961868976e-06, "loss": 0.9094, "step": 36950 }, { "epoch": 0.72, "learning_rate": 5.514905944760899e-06, "loss": 0.8644, "step": 37000 }, { "epoch": 0.73, "learning_rate": 5.4953314933349e-06, "loss": 0.8831, "step": 37050 }, { "epoch": 0.73, "learning_rate": 5.4757570419089016e-06, "loss": 0.8597, "step": 37100 }, { "epoch": 0.73, "learning_rate": 5.456182590482902e-06, "loss": 0.7675, "step": 37150 }, { "epoch": 0.73, "learning_rate": 5.436608139056903e-06, "loss": 0.8921, "step": 37200 }, { "epoch": 0.73, "learning_rate": 5.417033687630905e-06, "loss": 0.8159, "step": 37250 }, { "epoch": 0.73, "learning_rate": 5.397459236204906e-06, "loss": 0.8174, "step": 37300 }, { "epoch": 0.73, "learning_rate": 5.3778847847789074e-06, "loss": 0.8421, "step": 37350 }, { "epoch": 0.73, "learning_rate": 5.358310333352909e-06, "loss": 0.8831, "step": 37400 }, { "epoch": 0.73, "learning_rate": 5.33873588192691e-06, "loss": 0.9144, "step": 37450 }, { "epoch": 0.73, "learning_rate": 5.319161430500911e-06, "loss": 0.7922, "step": 37500 }, { "epoch": 0.74, "learning_rate": 5.299586979074912e-06, "loss": 0.8456, "step": 37550 }, { "epoch": 0.74, "learning_rate": 5.280012527648913e-06, "loss": 0.8568, "step": 37600 }, { "epoch": 0.74, "learning_rate": 5.260438076222915e-06, "loss": 0.8747, "step": 37650 }, { "epoch": 0.74, "learning_rate": 5.240863624796916e-06, "loss": 0.8253, "step": 37700 }, { "epoch": 0.74, "learning_rate": 5.221289173370917e-06, "loss": 0.9293, "step": 37750 }, { "epoch": 0.74, "learning_rate": 5.201714721944918e-06, "loss": 0.7685, "step": 37800 }, { "epoch": 0.74, "learning_rate": 5.182140270518919e-06, "loss": 0.8248, "step": 37850 }, { "epoch": 0.74, "learning_rate": 5.1625658190929205e-06, "loss": 0.828, "step": 37900 }, { "epoch": 0.74, "learning_rate": 5.142991367666922e-06, "loss": 0.8755, "step": 37950 }, { "epoch": 0.74, "learning_rate": 5.123416916240923e-06, "loss": 0.7804, "step": 38000 }, { "epoch": 0.74, "learning_rate": 5.1038424648149245e-06, "loss": 0.8399, "step": 38050 }, { "epoch": 0.75, "learning_rate": 5.084268013388926e-06, "loss": 0.936, "step": 38100 }, { "epoch": 0.75, "learning_rate": 5.064693561962926e-06, "loss": 0.8735, "step": 38150 }, { "epoch": 0.75, "learning_rate": 5.045119110536928e-06, "loss": 0.7677, "step": 38200 }, { "epoch": 0.75, "learning_rate": 5.025544659110929e-06, "loss": 0.8416, "step": 38250 }, { "epoch": 0.75, "learning_rate": 5.00597020768493e-06, "loss": 0.7239, "step": 38300 }, { "epoch": 0.75, "learning_rate": 4.986395756258931e-06, "loss": 0.8729, "step": 38350 }, { "epoch": 0.75, "learning_rate": 4.966821304832932e-06, "loss": 0.8779, "step": 38400 }, { "epoch": 0.75, "learning_rate": 4.9472468534069335e-06, "loss": 0.9235, "step": 38450 }, { "epoch": 0.75, "learning_rate": 4.927672401980935e-06, "loss": 0.8315, "step": 38500 }, { "epoch": 0.75, "learning_rate": 4.908097950554936e-06, "loss": 0.7789, "step": 38550 }, { "epoch": 0.76, "learning_rate": 4.888523499128937e-06, "loss": 0.8934, "step": 38600 }, { "epoch": 0.76, "learning_rate": 4.868949047702938e-06, "loss": 0.8634, "step": 38650 }, { "epoch": 0.76, "learning_rate": 4.849374596276939e-06, "loss": 0.7719, "step": 38700 }, { "epoch": 0.76, "learning_rate": 4.829800144850941e-06, "loss": 0.8202, "step": 38750 }, { "epoch": 0.76, "learning_rate": 4.810225693424942e-06, "loss": 0.8192, "step": 38800 }, { "epoch": 0.76, "learning_rate": 4.790651241998943e-06, "loss": 0.8286, "step": 38850 }, { "epoch": 0.76, "learning_rate": 4.771076790572945e-06, "loss": 0.8023, "step": 38900 }, { "epoch": 0.76, "learning_rate": 4.751502339146945e-06, "loss": 0.8284, "step": 38950 }, { "epoch": 0.76, "learning_rate": 4.731927887720947e-06, "loss": 0.7514, "step": 39000 }, { "epoch": 0.76, "learning_rate": 4.712353436294948e-06, "loss": 0.8086, "step": 39050 }, { "epoch": 0.77, "learning_rate": 4.692778984868949e-06, "loss": 0.8498, "step": 39100 }, { "epoch": 0.77, "learning_rate": 4.673204533442951e-06, "loss": 0.8595, "step": 39150 }, { "epoch": 0.77, "learning_rate": 4.653630082016952e-06, "loss": 0.7855, "step": 39200 }, { "epoch": 0.77, "learning_rate": 4.6340556305909524e-06, "loss": 0.85, "step": 39250 }, { "epoch": 0.77, "learning_rate": 4.614481179164954e-06, "loss": 0.9688, "step": 39300 }, { "epoch": 0.77, "learning_rate": 4.594906727738955e-06, "loss": 0.7548, "step": 39350 }, { "epoch": 0.77, "learning_rate": 4.5753322763129565e-06, "loss": 0.8934, "step": 39400 }, { "epoch": 0.77, "learning_rate": 4.555757824886958e-06, "loss": 0.8086, "step": 39450 }, { "epoch": 0.77, "learning_rate": 4.536183373460959e-06, "loss": 0.8638, "step": 39500 }, { "epoch": 0.77, "learning_rate": 4.5166089220349605e-06, "loss": 0.8957, "step": 39550 }, { "epoch": 0.78, "learning_rate": 4.497034470608961e-06, "loss": 0.935, "step": 39600 }, { "epoch": 0.78, "learning_rate": 4.477460019182962e-06, "loss": 0.8048, "step": 39650 }, { "epoch": 0.78, "learning_rate": 4.457885567756964e-06, "loss": 0.8327, "step": 39700 }, { "epoch": 0.78, "learning_rate": 4.438311116330965e-06, "loss": 0.7151, "step": 39750 }, { "epoch": 0.78, "learning_rate": 4.418736664904966e-06, "loss": 0.8875, "step": 39800 }, { "epoch": 0.78, "learning_rate": 4.399162213478968e-06, "loss": 0.8725, "step": 39850 }, { "epoch": 0.78, "learning_rate": 4.379587762052969e-06, "loss": 0.9833, "step": 39900 }, { "epoch": 0.78, "learning_rate": 4.3600133106269695e-06, "loss": 0.8513, "step": 39950 }, { "epoch": 0.78, "learning_rate": 4.340438859200971e-06, "loss": 0.8536, "step": 40000 }, { "epoch": 0.78, "learning_rate": 4.320864407774972e-06, "loss": 0.8258, "step": 40050 }, { "epoch": 0.78, "learning_rate": 4.3012899563489735e-06, "loss": 0.822, "step": 40100 }, { "epoch": 0.79, "learning_rate": 4.281715504922975e-06, "loss": 0.9229, "step": 40150 }, { "epoch": 0.79, "learning_rate": 4.262141053496976e-06, "loss": 0.8064, "step": 40200 }, { "epoch": 0.79, "learning_rate": 4.2425666020709776e-06, "loss": 0.8638, "step": 40250 }, { "epoch": 0.79, "learning_rate": 4.222992150644978e-06, "loss": 0.8485, "step": 40300 }, { "epoch": 0.79, "learning_rate": 4.203417699218979e-06, "loss": 0.874, "step": 40350 }, { "epoch": 0.79, "learning_rate": 4.183843247792981e-06, "loss": 0.8052, "step": 40400 }, { "epoch": 0.79, "learning_rate": 4.164268796366982e-06, "loss": 0.8778, "step": 40450 }, { "epoch": 0.79, "learning_rate": 4.1446943449409834e-06, "loss": 0.8941, "step": 40500 }, { "epoch": 0.79, "learning_rate": 4.125119893514985e-06, "loss": 0.8616, "step": 40550 }, { "epoch": 0.79, "learning_rate": 4.105545442088985e-06, "loss": 0.8288, "step": 40600 }, { "epoch": 0.8, "learning_rate": 4.085970990662987e-06, "loss": 0.8596, "step": 40650 }, { "epoch": 0.8, "learning_rate": 4.066396539236988e-06, "loss": 0.7813, "step": 40700 }, { "epoch": 0.8, "learning_rate": 4.046822087810989e-06, "loss": 0.8639, "step": 40750 }, { "epoch": 0.8, "learning_rate": 4.027247636384991e-06, "loss": 0.7813, "step": 40800 }, { "epoch": 0.8, "learning_rate": 4.007673184958992e-06, "loss": 0.8996, "step": 40850 }, { "epoch": 0.8, "learning_rate": 3.988098733532993e-06, "loss": 0.8716, "step": 40900 }, { "epoch": 0.8, "learning_rate": 3.968524282106994e-06, "loss": 0.7635, "step": 40950 }, { "epoch": 0.8, "learning_rate": 3.948949830680995e-06, "loss": 0.9362, "step": 41000 }, { "epoch": 0.8, "learning_rate": 3.9293753792549965e-06, "loss": 0.8802, "step": 41050 }, { "epoch": 0.8, "learning_rate": 3.909800927828998e-06, "loss": 0.8532, "step": 41100 }, { "epoch": 0.81, "learning_rate": 3.890226476402999e-06, "loss": 0.9293, "step": 41150 }, { "epoch": 0.81, "learning_rate": 3.8706520249770005e-06, "loss": 0.8303, "step": 41200 }, { "epoch": 0.81, "learning_rate": 3.851077573551002e-06, "loss": 0.8075, "step": 41250 }, { "epoch": 0.81, "learning_rate": 3.831503122125002e-06, "loss": 0.8403, "step": 41300 }, { "epoch": 0.81, "learning_rate": 3.8119286706990037e-06, "loss": 0.7709, "step": 41350 }, { "epoch": 0.81, "learning_rate": 3.792354219273005e-06, "loss": 0.8533, "step": 41400 }, { "epoch": 0.81, "learning_rate": 3.7727797678470064e-06, "loss": 0.8405, "step": 41450 }, { "epoch": 0.81, "learning_rate": 3.7532053164210077e-06, "loss": 0.9128, "step": 41500 }, { "epoch": 0.81, "learning_rate": 3.7336308649950086e-06, "loss": 0.8026, "step": 41550 }, { "epoch": 0.81, "learning_rate": 3.71405641356901e-06, "loss": 0.9514, "step": 41600 }, { "epoch": 0.82, "learning_rate": 3.6944819621430113e-06, "loss": 0.7858, "step": 41650 }, { "epoch": 0.82, "learning_rate": 3.6749075107170122e-06, "loss": 0.8624, "step": 41700 }, { "epoch": 0.82, "learning_rate": 3.6553330592910136e-06, "loss": 0.8648, "step": 41750 }, { "epoch": 0.82, "learning_rate": 3.635758607865015e-06, "loss": 0.8852, "step": 41800 }, { "epoch": 0.82, "learning_rate": 3.616184156439016e-06, "loss": 0.8459, "step": 41850 }, { "epoch": 0.82, "learning_rate": 3.596609705013017e-06, "loss": 0.8714, "step": 41900 }, { "epoch": 0.82, "learning_rate": 3.5770352535870185e-06, "loss": 0.939, "step": 41950 }, { "epoch": 0.82, "learning_rate": 3.55746080216102e-06, "loss": 0.8739, "step": 42000 }, { "epoch": 0.82, "learning_rate": 3.5378863507350208e-06, "loss": 0.8866, "step": 42050 }, { "epoch": 0.82, "learning_rate": 3.518311899309022e-06, "loss": 0.8766, "step": 42100 }, { "epoch": 0.83, "learning_rate": 3.4987374478830234e-06, "loss": 0.9094, "step": 42150 }, { "epoch": 0.83, "learning_rate": 3.4791629964570244e-06, "loss": 0.8963, "step": 42200 }, { "epoch": 0.83, "learning_rate": 3.4595885450310257e-06, "loss": 0.7177, "step": 42250 }, { "epoch": 0.83, "learning_rate": 3.440014093605027e-06, "loss": 0.8551, "step": 42300 }, { "epoch": 0.83, "learning_rate": 3.420439642179028e-06, "loss": 0.8674, "step": 42350 }, { "epoch": 0.83, "learning_rate": 3.4008651907530293e-06, "loss": 0.8375, "step": 42400 }, { "epoch": 0.83, "learning_rate": 3.3812907393270306e-06, "loss": 0.8921, "step": 42450 }, { "epoch": 0.83, "learning_rate": 3.361716287901032e-06, "loss": 0.8845, "step": 42500 }, { "epoch": 0.83, "learning_rate": 3.342141836475033e-06, "loss": 0.8361, "step": 42550 }, { "epoch": 0.83, "learning_rate": 3.3225673850490342e-06, "loss": 0.8161, "step": 42600 }, { "epoch": 0.83, "learning_rate": 3.3029929336230356e-06, "loss": 0.8561, "step": 42650 }, { "epoch": 0.84, "learning_rate": 3.2834184821970365e-06, "loss": 0.8798, "step": 42700 }, { "epoch": 0.84, "learning_rate": 3.263844030771038e-06, "loss": 0.8713, "step": 42750 }, { "epoch": 0.84, "learning_rate": 3.244269579345039e-06, "loss": 0.8094, "step": 42800 }, { "epoch": 0.84, "learning_rate": 3.22469512791904e-06, "loss": 0.8569, "step": 42850 }, { "epoch": 0.84, "learning_rate": 3.2051206764930414e-06, "loss": 0.7974, "step": 42900 }, { "epoch": 0.84, "learning_rate": 3.1855462250670428e-06, "loss": 0.7707, "step": 42950 }, { "epoch": 0.84, "learning_rate": 3.165971773641044e-06, "loss": 0.8129, "step": 43000 }, { "epoch": 0.84, "learning_rate": 3.146397322215045e-06, "loss": 0.8381, "step": 43050 }, { "epoch": 0.84, "learning_rate": 3.1268228707890464e-06, "loss": 0.9326, "step": 43100 }, { "epoch": 0.84, "learning_rate": 3.1072484193630477e-06, "loss": 0.9263, "step": 43150 }, { "epoch": 0.85, "learning_rate": 3.0876739679370486e-06, "loss": 0.8431, "step": 43200 }, { "epoch": 0.85, "learning_rate": 3.06809951651105e-06, "loss": 0.7909, "step": 43250 }, { "epoch": 0.85, "learning_rate": 3.0485250650850513e-06, "loss": 0.832, "step": 43300 }, { "epoch": 0.85, "learning_rate": 3.0289506136590522e-06, "loss": 0.9167, "step": 43350 }, { "epoch": 0.85, "learning_rate": 3.0093761622330536e-06, "loss": 0.9032, "step": 43400 }, { "epoch": 0.85, "learning_rate": 2.989801710807055e-06, "loss": 0.8083, "step": 43450 }, { "epoch": 0.85, "learning_rate": 2.9702272593810562e-06, "loss": 0.8541, "step": 43500 }, { "epoch": 0.85, "learning_rate": 2.950652807955057e-06, "loss": 0.787, "step": 43550 }, { "epoch": 0.85, "learning_rate": 2.9310783565290585e-06, "loss": 0.879, "step": 43600 }, { "epoch": 0.85, "learning_rate": 2.91150390510306e-06, "loss": 0.8119, "step": 43650 }, { "epoch": 0.86, "learning_rate": 2.8919294536770608e-06, "loss": 0.86, "step": 43700 }, { "epoch": 0.86, "learning_rate": 2.872355002251062e-06, "loss": 0.8162, "step": 43750 }, { "epoch": 0.86, "learning_rate": 2.8527805508250634e-06, "loss": 0.8034, "step": 43800 }, { "epoch": 0.86, "learning_rate": 2.8332060993990644e-06, "loss": 0.7883, "step": 43850 }, { "epoch": 0.86, "learning_rate": 2.8136316479730657e-06, "loss": 0.8927, "step": 43900 }, { "epoch": 0.86, "learning_rate": 2.794057196547067e-06, "loss": 0.8585, "step": 43950 }, { "epoch": 0.86, "learning_rate": 2.7744827451210684e-06, "loss": 0.8768, "step": 44000 }, { "epoch": 0.86, "learning_rate": 2.7549082936950693e-06, "loss": 0.826, "step": 44050 }, { "epoch": 0.86, "learning_rate": 2.7353338422690706e-06, "loss": 0.8907, "step": 44100 }, { "epoch": 0.86, "learning_rate": 2.715759390843072e-06, "loss": 0.8869, "step": 44150 }, { "epoch": 0.87, "learning_rate": 2.696184939417073e-06, "loss": 0.8397, "step": 44200 }, { "epoch": 0.87, "learning_rate": 2.6766104879910742e-06, "loss": 0.9316, "step": 44250 }, { "epoch": 0.87, "learning_rate": 2.6570360365650756e-06, "loss": 0.8169, "step": 44300 }, { "epoch": 0.87, "learning_rate": 2.6374615851390765e-06, "loss": 0.841, "step": 44350 }, { "epoch": 0.87, "learning_rate": 2.617887133713078e-06, "loss": 0.8966, "step": 44400 }, { "epoch": 0.87, "learning_rate": 2.598312682287079e-06, "loss": 0.8448, "step": 44450 }, { "epoch": 0.87, "learning_rate": 2.5787382308610805e-06, "loss": 0.7451, "step": 44500 }, { "epoch": 0.87, "learning_rate": 2.5591637794350814e-06, "loss": 0.921, "step": 44550 }, { "epoch": 0.87, "learning_rate": 2.5395893280090828e-06, "loss": 0.8679, "step": 44600 }, { "epoch": 0.87, "learning_rate": 2.520014876583084e-06, "loss": 0.94, "step": 44650 }, { "epoch": 0.87, "learning_rate": 2.500440425157085e-06, "loss": 0.7933, "step": 44700 }, { "epoch": 0.88, "learning_rate": 2.4808659737310864e-06, "loss": 0.8309, "step": 44750 }, { "epoch": 0.88, "learning_rate": 2.4612915223050877e-06, "loss": 0.8401, "step": 44800 }, { "epoch": 0.88, "learning_rate": 2.4417170708790886e-06, "loss": 0.8334, "step": 44850 }, { "epoch": 0.88, "learning_rate": 2.42214261945309e-06, "loss": 0.885, "step": 44900 }, { "epoch": 0.88, "learning_rate": 2.4025681680270913e-06, "loss": 0.8044, "step": 44950 }, { "epoch": 0.88, "learning_rate": 2.3829937166010927e-06, "loss": 0.8739, "step": 45000 }, { "epoch": 0.88, "learning_rate": 2.3634192651750936e-06, "loss": 0.8277, "step": 45050 }, { "epoch": 0.88, "learning_rate": 2.343844813749095e-06, "loss": 0.7745, "step": 45100 }, { "epoch": 0.88, "learning_rate": 2.3242703623230963e-06, "loss": 0.8381, "step": 45150 }, { "epoch": 0.88, "learning_rate": 2.304695910897097e-06, "loss": 0.8805, "step": 45200 }, { "epoch": 0.89, "learning_rate": 2.2851214594710985e-06, "loss": 0.8687, "step": 45250 }, { "epoch": 0.89, "learning_rate": 2.2655470080451e-06, "loss": 0.986, "step": 45300 }, { "epoch": 0.89, "learning_rate": 2.2459725566191008e-06, "loss": 0.8102, "step": 45350 }, { "epoch": 0.89, "learning_rate": 2.226398105193102e-06, "loss": 0.8374, "step": 45400 }, { "epoch": 0.89, "learning_rate": 2.2068236537671035e-06, "loss": 0.8183, "step": 45450 }, { "epoch": 0.89, "learning_rate": 2.187249202341105e-06, "loss": 0.7379, "step": 45500 }, { "epoch": 0.89, "learning_rate": 2.1676747509151057e-06, "loss": 0.8932, "step": 45550 }, { "epoch": 0.89, "learning_rate": 2.148100299489107e-06, "loss": 0.8039, "step": 45600 }, { "epoch": 0.89, "learning_rate": 2.1285258480631084e-06, "loss": 0.8697, "step": 45650 }, { "epoch": 0.89, "learning_rate": 2.1089513966371093e-06, "loss": 0.8854, "step": 45700 }, { "epoch": 0.9, "learning_rate": 2.0893769452111106e-06, "loss": 0.7589, "step": 45750 }, { "epoch": 0.9, "learning_rate": 2.069802493785112e-06, "loss": 0.8185, "step": 45800 }, { "epoch": 0.9, "learning_rate": 2.050228042359113e-06, "loss": 0.8476, "step": 45850 }, { "epoch": 0.9, "learning_rate": 2.0306535909331142e-06, "loss": 0.8286, "step": 45900 }, { "epoch": 0.9, "learning_rate": 2.0110791395071156e-06, "loss": 0.8612, "step": 45950 }, { "epoch": 0.9, "learning_rate": 1.991504688081117e-06, "loss": 0.8429, "step": 46000 }, { "epoch": 0.9, "learning_rate": 1.971930236655118e-06, "loss": 0.7646, "step": 46050 }, { "epoch": 0.9, "learning_rate": 1.952355785229119e-06, "loss": 0.8977, "step": 46100 }, { "epoch": 0.9, "learning_rate": 1.9327813338031205e-06, "loss": 0.8532, "step": 46150 }, { "epoch": 0.9, "learning_rate": 1.9132068823771214e-06, "loss": 0.7703, "step": 46200 }, { "epoch": 0.91, "learning_rate": 1.8936324309511228e-06, "loss": 0.8496, "step": 46250 }, { "epoch": 0.91, "learning_rate": 1.8740579795251241e-06, "loss": 0.7815, "step": 46300 }, { "epoch": 0.91, "learning_rate": 1.8544835280991253e-06, "loss": 0.8324, "step": 46350 }, { "epoch": 0.91, "learning_rate": 1.8349090766731264e-06, "loss": 0.793, "step": 46400 }, { "epoch": 0.91, "learning_rate": 1.8153346252471277e-06, "loss": 0.852, "step": 46450 }, { "epoch": 0.91, "learning_rate": 1.7957601738211289e-06, "loss": 0.8608, "step": 46500 }, { "epoch": 0.91, "learning_rate": 1.7761857223951302e-06, "loss": 0.8912, "step": 46550 }, { "epoch": 0.91, "learning_rate": 1.7566112709691313e-06, "loss": 0.821, "step": 46600 }, { "epoch": 0.91, "learning_rate": 1.7370368195431325e-06, "loss": 1.0388, "step": 46650 }, { "epoch": 0.91, "learning_rate": 1.7174623681171338e-06, "loss": 0.91, "step": 46700 }, { "epoch": 0.92, "learning_rate": 1.697887916691135e-06, "loss": 0.8276, "step": 46750 }, { "epoch": 0.92, "learning_rate": 1.6783134652651363e-06, "loss": 0.8285, "step": 46800 }, { "epoch": 0.92, "learning_rate": 1.6587390138391374e-06, "loss": 0.8882, "step": 46850 }, { "epoch": 0.92, "learning_rate": 1.6391645624131385e-06, "loss": 0.8399, "step": 46900 }, { "epoch": 0.92, "learning_rate": 1.6195901109871399e-06, "loss": 0.8796, "step": 46950 }, { "epoch": 0.92, "learning_rate": 1.600015659561141e-06, "loss": 0.8012, "step": 47000 }, { "epoch": 0.92, "learning_rate": 1.5804412081351423e-06, "loss": 0.8199, "step": 47050 }, { "epoch": 0.92, "learning_rate": 1.5608667567091435e-06, "loss": 0.8194, "step": 47100 }, { "epoch": 0.92, "learning_rate": 1.5412923052831446e-06, "loss": 0.8026, "step": 47150 }, { "epoch": 0.92, "learning_rate": 1.521717853857146e-06, "loss": 0.9046, "step": 47200 }, { "epoch": 0.92, "learning_rate": 1.502143402431147e-06, "loss": 0.8155, "step": 47250 }, { "epoch": 0.93, "learning_rate": 1.4825689510051484e-06, "loss": 0.8619, "step": 47300 }, { "epoch": 0.93, "learning_rate": 1.4629944995791495e-06, "loss": 0.8269, "step": 47350 }, { "epoch": 0.93, "learning_rate": 1.4434200481531507e-06, "loss": 0.8113, "step": 47400 }, { "epoch": 0.93, "learning_rate": 1.423845596727152e-06, "loss": 0.8725, "step": 47450 }, { "epoch": 0.93, "learning_rate": 1.4042711453011531e-06, "loss": 0.8423, "step": 47500 }, { "epoch": 0.93, "learning_rate": 1.3846966938751545e-06, "loss": 0.8607, "step": 47550 }, { "epoch": 0.93, "learning_rate": 1.3651222424491556e-06, "loss": 0.7522, "step": 47600 }, { "epoch": 0.93, "learning_rate": 1.3455477910231567e-06, "loss": 0.804, "step": 47650 }, { "epoch": 0.93, "learning_rate": 1.325973339597158e-06, "loss": 0.8359, "step": 47700 }, { "epoch": 0.93, "learning_rate": 1.3063988881711592e-06, "loss": 0.8405, "step": 47750 }, { "epoch": 0.94, "learning_rate": 1.2868244367451605e-06, "loss": 0.8168, "step": 47800 }, { "epoch": 0.94, "learning_rate": 1.2672499853191617e-06, "loss": 0.7954, "step": 47850 }, { "epoch": 0.94, "learning_rate": 1.2476755338931628e-06, "loss": 0.8689, "step": 47900 }, { "epoch": 0.94, "learning_rate": 1.228101082467164e-06, "loss": 0.8532, "step": 47950 }, { "epoch": 0.94, "learning_rate": 1.2085266310411653e-06, "loss": 0.846, "step": 48000 }, { "epoch": 0.94, "learning_rate": 1.1889521796151664e-06, "loss": 0.8195, "step": 48050 }, { "epoch": 0.94, "learning_rate": 1.1693777281891675e-06, "loss": 0.8346, "step": 48100 }, { "epoch": 0.94, "learning_rate": 1.1498032767631689e-06, "loss": 0.8103, "step": 48150 }, { "epoch": 0.94, "learning_rate": 1.13022882533717e-06, "loss": 0.9013, "step": 48200 }, { "epoch": 0.94, "learning_rate": 1.1106543739111713e-06, "loss": 0.8704, "step": 48250 }, { "epoch": 0.95, "learning_rate": 1.0910799224851725e-06, "loss": 0.7644, "step": 48300 }, { "epoch": 0.95, "learning_rate": 1.0715054710591736e-06, "loss": 0.8247, "step": 48350 }, { "epoch": 0.95, "learning_rate": 1.051931019633175e-06, "loss": 0.7922, "step": 48400 }, { "epoch": 0.95, "learning_rate": 1.032356568207176e-06, "loss": 0.8569, "step": 48450 }, { "epoch": 0.95, "learning_rate": 1.0127821167811774e-06, "loss": 0.8725, "step": 48500 }, { "epoch": 0.95, "learning_rate": 9.932076653551785e-07, "loss": 0.8302, "step": 48550 }, { "epoch": 0.95, "learning_rate": 9.736332139291797e-07, "loss": 0.8937, "step": 48600 }, { "epoch": 0.95, "learning_rate": 9.54058762503181e-07, "loss": 0.8193, "step": 48650 }, { "epoch": 0.95, "learning_rate": 9.344843110771821e-07, "loss": 0.8036, "step": 48700 }, { "epoch": 0.95, "learning_rate": 9.149098596511834e-07, "loss": 0.7537, "step": 48750 }, { "epoch": 0.96, "learning_rate": 8.953354082251846e-07, "loss": 0.7915, "step": 48800 }, { "epoch": 0.96, "learning_rate": 8.757609567991858e-07, "loss": 0.8179, "step": 48850 }, { "epoch": 0.96, "learning_rate": 8.56186505373187e-07, "loss": 0.7896, "step": 48900 }, { "epoch": 0.96, "learning_rate": 8.366120539471882e-07, "loss": 0.8691, "step": 48950 }, { "epoch": 0.96, "learning_rate": 8.170376025211894e-07, "loss": 0.8881, "step": 49000 }, { "epoch": 0.96, "learning_rate": 7.974631510951907e-07, "loss": 0.8348, "step": 49050 }, { "epoch": 0.96, "learning_rate": 7.778886996691919e-07, "loss": 0.831, "step": 49100 }, { "epoch": 0.96, "learning_rate": 7.58314248243193e-07, "loss": 0.9022, "step": 49150 }, { "epoch": 0.96, "learning_rate": 7.387397968171943e-07, "loss": 0.8517, "step": 49200 }, { "epoch": 0.96, "learning_rate": 7.191653453911955e-07, "loss": 0.8352, "step": 49250 }, { "epoch": 0.97, "learning_rate": 6.995908939651967e-07, "loss": 0.8007, "step": 49300 }, { "epoch": 0.97, "learning_rate": 6.80016442539198e-07, "loss": 0.8099, "step": 49350 }, { "epoch": 0.97, "learning_rate": 6.604419911131991e-07, "loss": 0.8561, "step": 49400 }, { "epoch": 0.97, "learning_rate": 6.408675396872003e-07, "loss": 0.893, "step": 49450 }, { "epoch": 0.97, "learning_rate": 6.212930882612015e-07, "loss": 0.785, "step": 49500 }, { "epoch": 0.97, "learning_rate": 6.017186368352027e-07, "loss": 0.915, "step": 49550 }, { "epoch": 0.97, "learning_rate": 5.821441854092039e-07, "loss": 0.7017, "step": 49600 }, { "epoch": 0.97, "learning_rate": 5.625697339832052e-07, "loss": 0.8437, "step": 49650 }, { "epoch": 0.97, "learning_rate": 5.429952825572064e-07, "loss": 0.8002, "step": 49700 }, { "epoch": 0.97, "learning_rate": 5.234208311312075e-07, "loss": 0.8034, "step": 49750 }, { "epoch": 0.97, "learning_rate": 5.038463797052088e-07, "loss": 0.7989, "step": 49800 }, { "epoch": 0.98, "learning_rate": 4.8427192827921e-07, "loss": 0.8462, "step": 49850 }, { "epoch": 0.98, "learning_rate": 4.6469747685321123e-07, "loss": 0.7838, "step": 49900 }, { "epoch": 0.98, "learning_rate": 4.451230254272124e-07, "loss": 0.826, "step": 49950 }, { "epoch": 0.98, "learning_rate": 4.2554857400121365e-07, "loss": 0.9047, "step": 50000 }, { "epoch": 0.98, "learning_rate": 4.059741225752149e-07, "loss": 0.8526, "step": 50050 }, { "epoch": 0.98, "learning_rate": 3.8639967114921607e-07, "loss": 0.8042, "step": 50100 }, { "epoch": 0.98, "learning_rate": 3.668252197232173e-07, "loss": 0.9519, "step": 50150 }, { "epoch": 0.98, "learning_rate": 3.472507682972185e-07, "loss": 0.8923, "step": 50200 }, { "epoch": 0.98, "learning_rate": 3.276763168712197e-07, "loss": 0.7782, "step": 50250 }, { "epoch": 0.98, "learning_rate": 3.0810186544522095e-07, "loss": 0.8607, "step": 50300 }, { "epoch": 0.99, "learning_rate": 2.8852741401922214e-07, "loss": 0.8376, "step": 50350 }, { "epoch": 0.99, "learning_rate": 2.6895296259322337e-07, "loss": 0.842, "step": 50400 }, { "epoch": 0.99, "learning_rate": 2.4937851116722455e-07, "loss": 0.878, "step": 50450 }, { "epoch": 0.99, "learning_rate": 2.298040597412258e-07, "loss": 0.8581, "step": 50500 }, { "epoch": 0.99, "learning_rate": 2.10229608315227e-07, "loss": 0.8958, "step": 50550 }, { "epoch": 0.99, "learning_rate": 1.9065515688922818e-07, "loss": 0.92, "step": 50600 }, { "epoch": 0.99, "learning_rate": 1.7108070546322939e-07, "loss": 0.9209, "step": 50650 }, { "epoch": 0.99, "learning_rate": 1.5150625403723062e-07, "loss": 0.9095, "step": 50700 }, { "epoch": 0.99, "learning_rate": 1.3193180261123183e-07, "loss": 0.8469, "step": 50750 }, { "epoch": 0.99, "learning_rate": 1.1235735118523305e-07, "loss": 0.8987, "step": 50800 }, { "epoch": 1.0, "learning_rate": 9.278289975923425e-08, "loss": 0.8769, "step": 50850 }, { "epoch": 1.0, "learning_rate": 7.320844833323547e-08, "loss": 0.8072, "step": 50900 }, { "epoch": 1.0, "learning_rate": 5.3633996907236675e-08, "loss": 0.7988, "step": 50950 }, { "epoch": 1.0, "learning_rate": 3.405954548123789e-08, "loss": 0.7943, "step": 51000 }, { "epoch": 1.0, "learning_rate": 1.4485094055239104e-08, "loss": 0.7839, "step": 51050 }, { "epoch": 1.0, "eval_Macro F1": 0.45208930263938163, "eval_Macro Precision": 0.5507686024916876, "eval_Macro Recall": 0.4294052669214516, "eval_Micro F1": 0.7266960098561323, "eval_Micro Precision": 0.7266960098561323, "eval_Micro Recall": 0.7266960098561323, "eval_Weighted F1": 0.6482219016896026, "eval_Weighted Precision": 0.6261988330407515, "eval_Weighted Recall": 0.7266960098561323, "eval_accuracy": 0.7266960098561323, "eval_loss": 0.8367530107498169, "eval_runtime": 1309.1607, "eval_samples_per_second": 153.76, "eval_steps_per_second": 19.22, "step": 51087 }, { "epoch": 1.0, "step": 51087, "total_flos": 1927062258615528.0, "train_loss": 0.8681267702094373, "train_runtime": 9176.1966, "train_samples_per_second": 44.538, "train_steps_per_second": 5.567 } ], "max_steps": 51087, "num_train_epochs": 1, "total_flos": 1927062258615528.0, "trial_name": null, "trial_params": null }