diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6175 @@ +{ + "best_metric": 0.8367530107498169, + "best_model_checkpoint": "albert-base-v2-Malicious_URLs/checkpoint-51087", + "epoch": 1.0, + "global_step": 51087, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.9999608510971483e-05, + "loss": 1.1706, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.9980425548574e-05, + "loss": 1.1046, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.9960851097148004e-05, + "loss": 0.9971, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 1.9941276645722004e-05, + "loss": 1.0489, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 1.9921702194296007e-05, + "loss": 0.9931, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 1.9902127742870007e-05, + "loss": 1.0318, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 1.988255329144401e-05, + "loss": 1.015, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 1.986297884001801e-05, + "loss": 0.8838, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 1.9843404388592012e-05, + "loss": 0.9699, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 1.9823829937166012e-05, + "loss": 0.9734, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 1.9804255485740015e-05, + "loss": 0.9944, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 1.9784681034314015e-05, + "loss": 0.9173, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 1.9765106582888018e-05, + "loss": 0.8308, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 1.9745532131462017e-05, + "loss": 0.8602, + "step": 650 + }, + { + "epoch": 0.01, + "learning_rate": 1.9725957680036017e-05, + "loss": 0.9536, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 1.970638322861002e-05, + "loss": 0.9808, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 1.968680877718402e-05, + "loss": 0.884, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 1.9667234325758023e-05, + "loss": 0.9394, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 1.9647659874332022e-05, + "loss": 0.9618, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 1.9628085422906025e-05, + "loss": 0.9904, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 1.9608510971480025e-05, + "loss": 0.9654, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 1.9588936520054028e-05, + "loss": 0.8642, + "step": 1050 + }, + { + "epoch": 0.02, + "learning_rate": 1.9569362068628028e-05, + "loss": 0.949, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 1.954978761720203e-05, + "loss": 0.8394, + "step": 1150 + }, + { + "epoch": 0.02, + "learning_rate": 1.953021316577603e-05, + "loss": 0.8666, + "step": 1200 + }, + { + "epoch": 0.02, + "learning_rate": 1.9510638714350033e-05, + "loss": 0.9656, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 1.9491064262924033e-05, + "loss": 0.9162, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 1.9471489811498033e-05, + "loss": 0.9125, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 1.9451915360072036e-05, + "loss": 0.872, + "step": 1400 + }, + { + "epoch": 0.03, + "learning_rate": 1.9432340908646035e-05, + "loss": 0.9858, + "step": 1450 + }, + { + "epoch": 0.03, + "learning_rate": 1.941276645722004e-05, + "loss": 0.9059, + "step": 1500 + }, + { + "epoch": 0.03, + "learning_rate": 1.9393192005794038e-05, + "loss": 0.9647, + "step": 1550 + }, + { + "epoch": 0.03, + "learning_rate": 1.937361755436804e-05, + "loss": 0.9802, + "step": 1600 + }, + { + "epoch": 0.03, + "learning_rate": 1.935404310294204e-05, + "loss": 0.9604, + "step": 1650 + }, + { + "epoch": 0.03, + "learning_rate": 1.9334468651516044e-05, + "loss": 0.8774, + "step": 1700 + }, + { + "epoch": 0.03, + "learning_rate": 1.9314894200090043e-05, + "loss": 0.8881, + "step": 1750 + }, + { + "epoch": 0.04, + "learning_rate": 1.9295319748664047e-05, + "loss": 0.8775, + "step": 1800 + }, + { + "epoch": 0.04, + "learning_rate": 1.9275745297238046e-05, + "loss": 0.9427, + "step": 1850 + }, + { + "epoch": 0.04, + "learning_rate": 1.925617084581205e-05, + "loss": 0.8997, + "step": 1900 + }, + { + "epoch": 0.04, + "learning_rate": 1.923659639438605e-05, + "loss": 0.9101, + "step": 1950 + }, + { + "epoch": 0.04, + "learning_rate": 1.9217021942960052e-05, + "loss": 0.9247, + "step": 2000 + }, + { + "epoch": 0.04, + "learning_rate": 1.919744749153405e-05, + "loss": 0.8978, + "step": 2050 + }, + { + "epoch": 0.04, + "learning_rate": 1.917787304010805e-05, + "loss": 0.9436, + "step": 2100 + }, + { + "epoch": 0.04, + "learning_rate": 1.9158298588682054e-05, + "loss": 0.91, + "step": 2150 + }, + { + "epoch": 0.04, + "learning_rate": 1.9138724137256054e-05, + "loss": 0.965, + "step": 2200 + }, + { + "epoch": 0.04, + "learning_rate": 1.9119149685830057e-05, + "loss": 0.9488, + "step": 2250 + }, + { + "epoch": 0.05, + "learning_rate": 1.9099575234404057e-05, + "loss": 0.8534, + "step": 2300 + }, + { + "epoch": 0.05, + "learning_rate": 1.908000078297806e-05, + "loss": 0.9636, + "step": 2350 + }, + { + "epoch": 0.05, + "learning_rate": 1.906042633155206e-05, + "loss": 0.9581, + "step": 2400 + }, + { + "epoch": 0.05, + "learning_rate": 1.9040851880126062e-05, + "loss": 0.83, + "step": 2450 + }, + { + "epoch": 0.05, + "learning_rate": 1.9021277428700062e-05, + "loss": 0.9326, + "step": 2500 + }, + { + "epoch": 0.05, + "learning_rate": 1.9001702977274065e-05, + "loss": 0.9152, + "step": 2550 + }, + { + "epoch": 0.05, + "learning_rate": 1.8982128525848065e-05, + "loss": 0.9363, + "step": 2600 + }, + { + "epoch": 0.05, + "learning_rate": 1.8962554074422068e-05, + "loss": 0.8746, + "step": 2650 + }, + { + "epoch": 0.05, + "learning_rate": 1.8942979622996067e-05, + "loss": 0.8996, + "step": 2700 + }, + { + "epoch": 0.05, + "learning_rate": 1.8923405171570067e-05, + "loss": 0.9116, + "step": 2750 + }, + { + "epoch": 0.05, + "learning_rate": 1.890383072014407e-05, + "loss": 0.8015, + "step": 2800 + }, + { + "epoch": 0.06, + "learning_rate": 1.888425626871807e-05, + "loss": 0.926, + "step": 2850 + }, + { + "epoch": 0.06, + "learning_rate": 1.8864681817292073e-05, + "loss": 0.8506, + "step": 2900 + }, + { + "epoch": 0.06, + "learning_rate": 1.8845107365866072e-05, + "loss": 0.9913, + "step": 2950 + }, + { + "epoch": 0.06, + "learning_rate": 1.8825532914440075e-05, + "loss": 0.8355, + "step": 3000 + }, + { + "epoch": 0.06, + "learning_rate": 1.8805958463014075e-05, + "loss": 0.9649, + "step": 3050 + }, + { + "epoch": 0.06, + "learning_rate": 1.8786384011588078e-05, + "loss": 0.8837, + "step": 3100 + }, + { + "epoch": 0.06, + "learning_rate": 1.8766809560162078e-05, + "loss": 0.9546, + "step": 3150 + }, + { + "epoch": 0.06, + "learning_rate": 1.874723510873608e-05, + "loss": 0.8529, + "step": 3200 + }, + { + "epoch": 0.06, + "learning_rate": 1.872766065731008e-05, + "loss": 0.9503, + "step": 3250 + }, + { + "epoch": 0.06, + "learning_rate": 1.8708086205884083e-05, + "loss": 1.0059, + "step": 3300 + }, + { + "epoch": 0.07, + "learning_rate": 1.8688511754458083e-05, + "loss": 0.9493, + "step": 3350 + }, + { + "epoch": 0.07, + "learning_rate": 1.8668937303032083e-05, + "loss": 0.8838, + "step": 3400 + }, + { + "epoch": 0.07, + "learning_rate": 1.8649362851606086e-05, + "loss": 0.8728, + "step": 3450 + }, + { + "epoch": 0.07, + "learning_rate": 1.8629788400180085e-05, + "loss": 0.8237, + "step": 3500 + }, + { + "epoch": 0.07, + "learning_rate": 1.861021394875409e-05, + "loss": 0.9691, + "step": 3550 + }, + { + "epoch": 0.07, + "learning_rate": 1.8590639497328088e-05, + "loss": 0.8934, + "step": 3600 + }, + { + "epoch": 0.07, + "learning_rate": 1.857106504590209e-05, + "loss": 0.8628, + "step": 3650 + }, + { + "epoch": 0.07, + "learning_rate": 1.855149059447609e-05, + "loss": 0.9047, + "step": 3700 + }, + { + "epoch": 0.07, + "learning_rate": 1.8531916143050094e-05, + "loss": 0.9896, + "step": 3750 + }, + { + "epoch": 0.07, + "learning_rate": 1.8512341691624093e-05, + "loss": 0.8885, + "step": 3800 + }, + { + "epoch": 0.08, + "learning_rate": 1.8492767240198096e-05, + "loss": 0.9171, + "step": 3850 + }, + { + "epoch": 0.08, + "learning_rate": 1.8473192788772096e-05, + "loss": 0.9144, + "step": 3900 + }, + { + "epoch": 0.08, + "learning_rate": 1.84536183373461e-05, + "loss": 0.949, + "step": 3950 + }, + { + "epoch": 0.08, + "learning_rate": 1.84340438859201e-05, + "loss": 0.9595, + "step": 4000 + }, + { + "epoch": 0.08, + "learning_rate": 1.84144694344941e-05, + "loss": 0.9018, + "step": 4050 + }, + { + "epoch": 0.08, + "learning_rate": 1.83948949830681e-05, + "loss": 0.9434, + "step": 4100 + }, + { + "epoch": 0.08, + "learning_rate": 1.83753205316421e-05, + "loss": 1.0054, + "step": 4150 + }, + { + "epoch": 0.08, + "learning_rate": 1.8355746080216104e-05, + "loss": 0.8584, + "step": 4200 + }, + { + "epoch": 0.08, + "learning_rate": 1.8336171628790104e-05, + "loss": 0.8759, + "step": 4250 + }, + { + "epoch": 0.08, + "learning_rate": 1.8316597177364107e-05, + "loss": 0.7978, + "step": 4300 + }, + { + "epoch": 0.09, + "learning_rate": 1.8297022725938106e-05, + "loss": 0.8494, + "step": 4350 + }, + { + "epoch": 0.09, + "learning_rate": 1.827744827451211e-05, + "loss": 0.8714, + "step": 4400 + }, + { + "epoch": 0.09, + "learning_rate": 1.825787382308611e-05, + "loss": 0.9271, + "step": 4450 + }, + { + "epoch": 0.09, + "learning_rate": 1.8238299371660112e-05, + "loss": 0.9466, + "step": 4500 + }, + { + "epoch": 0.09, + "learning_rate": 1.8218724920234112e-05, + "loss": 0.8289, + "step": 4550 + }, + { + "epoch": 0.09, + "learning_rate": 1.8199150468808115e-05, + "loss": 0.9159, + "step": 4600 + }, + { + "epoch": 0.09, + "learning_rate": 1.8179576017382114e-05, + "loss": 0.8418, + "step": 4650 + }, + { + "epoch": 0.09, + "learning_rate": 1.8160001565956114e-05, + "loss": 0.9818, + "step": 4700 + }, + { + "epoch": 0.09, + "learning_rate": 1.8140427114530117e-05, + "loss": 0.8762, + "step": 4750 + }, + { + "epoch": 0.09, + "learning_rate": 1.8120852663104117e-05, + "loss": 0.9628, + "step": 4800 + }, + { + "epoch": 0.09, + "learning_rate": 1.810127821167812e-05, + "loss": 0.7778, + "step": 4850 + }, + { + "epoch": 0.1, + "learning_rate": 1.808170376025212e-05, + "loss": 0.8857, + "step": 4900 + }, + { + "epoch": 0.1, + "learning_rate": 1.8062129308826123e-05, + "loss": 0.9288, + "step": 4950 + }, + { + "epoch": 0.1, + "learning_rate": 1.8042554857400122e-05, + "loss": 0.9101, + "step": 5000 + }, + { + "epoch": 0.1, + "learning_rate": 1.8022980405974125e-05, + "loss": 0.9358, + "step": 5050 + }, + { + "epoch": 0.1, + "learning_rate": 1.8003405954548125e-05, + "loss": 0.8956, + "step": 5100 + }, + { + "epoch": 0.1, + "learning_rate": 1.7983831503122128e-05, + "loss": 0.8506, + "step": 5150 + }, + { + "epoch": 0.1, + "learning_rate": 1.7964257051696128e-05, + "loss": 0.8635, + "step": 5200 + }, + { + "epoch": 0.1, + "learning_rate": 1.794468260027013e-05, + "loss": 0.9267, + "step": 5250 + }, + { + "epoch": 0.1, + "learning_rate": 1.792510814884413e-05, + "loss": 0.8688, + "step": 5300 + }, + { + "epoch": 0.1, + "learning_rate": 1.790553369741813e-05, + "loss": 0.8538, + "step": 5350 + }, + { + "epoch": 0.11, + "learning_rate": 1.7885959245992133e-05, + "loss": 0.8628, + "step": 5400 + }, + { + "epoch": 0.11, + "learning_rate": 1.7866384794566133e-05, + "loss": 0.9528, + "step": 5450 + }, + { + "epoch": 0.11, + "learning_rate": 1.7846810343140136e-05, + "loss": 0.9198, + "step": 5500 + }, + { + "epoch": 0.11, + "learning_rate": 1.7827235891714135e-05, + "loss": 0.8878, + "step": 5550 + }, + { + "epoch": 0.11, + "learning_rate": 1.7807661440288138e-05, + "loss": 0.9425, + "step": 5600 + }, + { + "epoch": 0.11, + "learning_rate": 1.7788086988862138e-05, + "loss": 0.8977, + "step": 5650 + }, + { + "epoch": 0.11, + "learning_rate": 1.776851253743614e-05, + "loss": 0.8967, + "step": 5700 + }, + { + "epoch": 0.11, + "learning_rate": 1.774893808601014e-05, + "loss": 0.9365, + "step": 5750 + }, + { + "epoch": 0.11, + "learning_rate": 1.7729363634584144e-05, + "loss": 0.8376, + "step": 5800 + }, + { + "epoch": 0.11, + "learning_rate": 1.7709789183158143e-05, + "loss": 0.8587, + "step": 5850 + }, + { + "epoch": 0.12, + "learning_rate": 1.7690214731732146e-05, + "loss": 0.8808, + "step": 5900 + }, + { + "epoch": 0.12, + "learning_rate": 1.7670640280306146e-05, + "loss": 0.8871, + "step": 5950 + }, + { + "epoch": 0.12, + "learning_rate": 1.765106582888015e-05, + "loss": 0.8399, + "step": 6000 + }, + { + "epoch": 0.12, + "learning_rate": 1.763149137745415e-05, + "loss": 0.9007, + "step": 6050 + }, + { + "epoch": 0.12, + "learning_rate": 1.7611916926028148e-05, + "loss": 0.9102, + "step": 6100 + }, + { + "epoch": 0.12, + "learning_rate": 1.759234247460215e-05, + "loss": 0.8689, + "step": 6150 + }, + { + "epoch": 0.12, + "learning_rate": 1.757276802317615e-05, + "loss": 0.9715, + "step": 6200 + }, + { + "epoch": 0.12, + "learning_rate": 1.7553193571750154e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 0.12, + "learning_rate": 1.7533619120324154e-05, + "loss": 0.8597, + "step": 6300 + }, + { + "epoch": 0.12, + "learning_rate": 1.7514044668898157e-05, + "loss": 0.9172, + "step": 6350 + }, + { + "epoch": 0.13, + "learning_rate": 1.7494470217472156e-05, + "loss": 0.9287, + "step": 6400 + }, + { + "epoch": 0.13, + "learning_rate": 1.747489576604616e-05, + "loss": 0.8687, + "step": 6450 + }, + { + "epoch": 0.13, + "learning_rate": 1.745532131462016e-05, + "loss": 0.8804, + "step": 6500 + }, + { + "epoch": 0.13, + "learning_rate": 1.7435746863194162e-05, + "loss": 0.9483, + "step": 6550 + }, + { + "epoch": 0.13, + "learning_rate": 1.741617241176816e-05, + "loss": 0.8749, + "step": 6600 + }, + { + "epoch": 0.13, + "learning_rate": 1.7396597960342165e-05, + "loss": 0.9452, + "step": 6650 + }, + { + "epoch": 0.13, + "learning_rate": 1.7377023508916164e-05, + "loss": 0.9059, + "step": 6700 + }, + { + "epoch": 0.13, + "learning_rate": 1.7357449057490164e-05, + "loss": 0.8311, + "step": 6750 + }, + { + "epoch": 0.13, + "learning_rate": 1.7337874606064167e-05, + "loss": 0.8671, + "step": 6800 + }, + { + "epoch": 0.13, + "learning_rate": 1.7318300154638167e-05, + "loss": 0.9169, + "step": 6850 + }, + { + "epoch": 0.14, + "learning_rate": 1.729872570321217e-05, + "loss": 0.9448, + "step": 6900 + }, + { + "epoch": 0.14, + "learning_rate": 1.727915125178617e-05, + "loss": 0.8703, + "step": 6950 + }, + { + "epoch": 0.14, + "learning_rate": 1.7259576800360172e-05, + "loss": 0.9438, + "step": 7000 + }, + { + "epoch": 0.14, + "learning_rate": 1.7240002348934172e-05, + "loss": 0.868, + "step": 7050 + }, + { + "epoch": 0.14, + "learning_rate": 1.7220427897508175e-05, + "loss": 0.9868, + "step": 7100 + }, + { + "epoch": 0.14, + "learning_rate": 1.7200853446082175e-05, + "loss": 0.7904, + "step": 7150 + }, + { + "epoch": 0.14, + "learning_rate": 1.7181278994656178e-05, + "loss": 0.8628, + "step": 7200 + }, + { + "epoch": 0.14, + "learning_rate": 1.7161704543230177e-05, + "loss": 0.8898, + "step": 7250 + }, + { + "epoch": 0.14, + "learning_rate": 1.714213009180418e-05, + "loss": 0.9962, + "step": 7300 + }, + { + "epoch": 0.14, + "learning_rate": 1.712255564037818e-05, + "loss": 0.939, + "step": 7350 + }, + { + "epoch": 0.14, + "learning_rate": 1.710298118895218e-05, + "loss": 0.7759, + "step": 7400 + }, + { + "epoch": 0.15, + "learning_rate": 1.7083406737526183e-05, + "loss": 0.8616, + "step": 7450 + }, + { + "epoch": 0.15, + "learning_rate": 1.7063832286100182e-05, + "loss": 0.9495, + "step": 7500 + }, + { + "epoch": 0.15, + "learning_rate": 1.7044257834674185e-05, + "loss": 0.8752, + "step": 7550 + }, + { + "epoch": 0.15, + "learning_rate": 1.7024683383248185e-05, + "loss": 0.8908, + "step": 7600 + }, + { + "epoch": 0.15, + "learning_rate": 1.7005108931822188e-05, + "loss": 0.8843, + "step": 7650 + }, + { + "epoch": 0.15, + "learning_rate": 1.6985534480396188e-05, + "loss": 0.9643, + "step": 7700 + }, + { + "epoch": 0.15, + "learning_rate": 1.696596002897019e-05, + "loss": 0.8528, + "step": 7750 + }, + { + "epoch": 0.15, + "learning_rate": 1.694638557754419e-05, + "loss": 0.8625, + "step": 7800 + }, + { + "epoch": 0.15, + "learning_rate": 1.6926811126118193e-05, + "loss": 0.9945, + "step": 7850 + }, + { + "epoch": 0.15, + "learning_rate": 1.6907236674692193e-05, + "loss": 0.8112, + "step": 7900 + }, + { + "epoch": 0.16, + "learning_rate": 1.6887662223266196e-05, + "loss": 0.9049, + "step": 7950 + }, + { + "epoch": 0.16, + "learning_rate": 1.6868087771840196e-05, + "loss": 0.8941, + "step": 8000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6848513320414195e-05, + "loss": 0.8887, + "step": 8050 + }, + { + "epoch": 0.16, + "learning_rate": 1.68289388689882e-05, + "loss": 0.941, + "step": 8100 + }, + { + "epoch": 0.16, + "learning_rate": 1.6809364417562198e-05, + "loss": 0.8806, + "step": 8150 + }, + { + "epoch": 0.16, + "learning_rate": 1.67897899661362e-05, + "loss": 0.9605, + "step": 8200 + }, + { + "epoch": 0.16, + "learning_rate": 1.67702155147102e-05, + "loss": 0.8432, + "step": 8250 + }, + { + "epoch": 0.16, + "learning_rate": 1.6750641063284204e-05, + "loss": 0.8381, + "step": 8300 + }, + { + "epoch": 0.16, + "learning_rate": 1.6731066611858204e-05, + "loss": 0.8558, + "step": 8350 + }, + { + "epoch": 0.16, + "learning_rate": 1.6711492160432207e-05, + "loss": 0.9525, + "step": 8400 + }, + { + "epoch": 0.17, + "learning_rate": 1.6691917709006206e-05, + "loss": 0.8599, + "step": 8450 + }, + { + "epoch": 0.17, + "learning_rate": 1.667234325758021e-05, + "loss": 0.9754, + "step": 8500 + }, + { + "epoch": 0.17, + "learning_rate": 1.665276880615421e-05, + "loss": 0.9006, + "step": 8550 + }, + { + "epoch": 0.17, + "learning_rate": 1.6633194354728212e-05, + "loss": 0.8845, + "step": 8600 + }, + { + "epoch": 0.17, + "learning_rate": 1.661361990330221e-05, + "loss": 1.0112, + "step": 8650 + }, + { + "epoch": 0.17, + "learning_rate": 1.659404545187621e-05, + "loss": 1.0344, + "step": 8700 + }, + { + "epoch": 0.17, + "learning_rate": 1.6574471000450214e-05, + "loss": 0.9505, + "step": 8750 + }, + { + "epoch": 0.17, + "learning_rate": 1.6554896549024214e-05, + "loss": 0.8472, + "step": 8800 + }, + { + "epoch": 0.17, + "learning_rate": 1.6535322097598217e-05, + "loss": 0.923, + "step": 8850 + }, + { + "epoch": 0.17, + "learning_rate": 1.6515747646172217e-05, + "loss": 0.9265, + "step": 8900 + }, + { + "epoch": 0.18, + "learning_rate": 1.649617319474622e-05, + "loss": 0.9428, + "step": 8950 + }, + { + "epoch": 0.18, + "learning_rate": 1.647659874332022e-05, + "loss": 0.961, + "step": 9000 + }, + { + "epoch": 0.18, + "learning_rate": 1.6457024291894222e-05, + "loss": 0.8903, + "step": 9050 + }, + { + "epoch": 0.18, + "learning_rate": 1.6437449840468222e-05, + "loss": 0.9757, + "step": 9100 + }, + { + "epoch": 0.18, + "learning_rate": 1.6417875389042225e-05, + "loss": 0.9208, + "step": 9150 + }, + { + "epoch": 0.18, + "learning_rate": 1.6398300937616225e-05, + "loss": 0.9723, + "step": 9200 + }, + { + "epoch": 0.18, + "learning_rate": 1.6378726486190228e-05, + "loss": 0.845, + "step": 9250 + }, + { + "epoch": 0.18, + "learning_rate": 1.6359152034764227e-05, + "loss": 0.9029, + "step": 9300 + }, + { + "epoch": 0.18, + "learning_rate": 1.633957758333823e-05, + "loss": 0.8009, + "step": 9350 + }, + { + "epoch": 0.18, + "learning_rate": 1.632000313191223e-05, + "loss": 0.9144, + "step": 9400 + }, + { + "epoch": 0.18, + "learning_rate": 1.630042868048623e-05, + "loss": 0.9191, + "step": 9450 + }, + { + "epoch": 0.19, + "learning_rate": 1.6280854229060233e-05, + "loss": 0.9451, + "step": 9500 + }, + { + "epoch": 0.19, + "learning_rate": 1.6261279777634232e-05, + "loss": 0.9365, + "step": 9550 + }, + { + "epoch": 0.19, + "learning_rate": 1.6241705326208235e-05, + "loss": 0.8914, + "step": 9600 + }, + { + "epoch": 0.19, + "learning_rate": 1.6222130874782235e-05, + "loss": 0.849, + "step": 9650 + }, + { + "epoch": 0.19, + "learning_rate": 1.6202556423356238e-05, + "loss": 0.8415, + "step": 9700 + }, + { + "epoch": 0.19, + "learning_rate": 1.6182981971930238e-05, + "loss": 0.9671, + "step": 9750 + }, + { + "epoch": 0.19, + "learning_rate": 1.616340752050424e-05, + "loss": 0.8248, + "step": 9800 + }, + { + "epoch": 0.19, + "learning_rate": 1.614383306907824e-05, + "loss": 0.8498, + "step": 9850 + }, + { + "epoch": 0.19, + "learning_rate": 1.6124258617652243e-05, + "loss": 1.0332, + "step": 9900 + }, + { + "epoch": 0.19, + "learning_rate": 1.6104684166226243e-05, + "loss": 0.9052, + "step": 9950 + }, + { + "epoch": 0.2, + "learning_rate": 1.6085109714800246e-05, + "loss": 0.9069, + "step": 10000 + }, + { + "epoch": 0.2, + "learning_rate": 1.6065535263374246e-05, + "loss": 0.8651, + "step": 10050 + }, + { + "epoch": 0.2, + "learning_rate": 1.6045960811948245e-05, + "loss": 0.9262, + "step": 10100 + }, + { + "epoch": 0.2, + "learning_rate": 1.602638636052225e-05, + "loss": 0.9556, + "step": 10150 + }, + { + "epoch": 0.2, + "learning_rate": 1.6006811909096248e-05, + "loss": 0.8846, + "step": 10200 + }, + { + "epoch": 0.2, + "learning_rate": 1.598723745767025e-05, + "loss": 0.8435, + "step": 10250 + }, + { + "epoch": 0.2, + "learning_rate": 1.596766300624425e-05, + "loss": 0.9585, + "step": 10300 + }, + { + "epoch": 0.2, + "learning_rate": 1.5948088554818254e-05, + "loss": 0.8239, + "step": 10350 + }, + { + "epoch": 0.2, + "learning_rate": 1.5928514103392253e-05, + "loss": 0.811, + "step": 10400 + }, + { + "epoch": 0.2, + "learning_rate": 1.5908939651966256e-05, + "loss": 0.8884, + "step": 10450 + }, + { + "epoch": 0.21, + "learning_rate": 1.5889365200540256e-05, + "loss": 0.8534, + "step": 10500 + }, + { + "epoch": 0.21, + "learning_rate": 1.586979074911426e-05, + "loss": 0.9667, + "step": 10550 + }, + { + "epoch": 0.21, + "learning_rate": 1.585021629768826e-05, + "loss": 0.9295, + "step": 10600 + }, + { + "epoch": 0.21, + "learning_rate": 1.5830641846262262e-05, + "loss": 0.9055, + "step": 10650 + }, + { + "epoch": 0.21, + "learning_rate": 1.581106739483626e-05, + "loss": 0.8377, + "step": 10700 + }, + { + "epoch": 0.21, + "learning_rate": 1.579149294341026e-05, + "loss": 0.9062, + "step": 10750 + }, + { + "epoch": 0.21, + "learning_rate": 1.5771918491984264e-05, + "loss": 0.8655, + "step": 10800 + }, + { + "epoch": 0.21, + "learning_rate": 1.5752344040558264e-05, + "loss": 0.8602, + "step": 10850 + }, + { + "epoch": 0.21, + "learning_rate": 1.5732769589132267e-05, + "loss": 0.8707, + "step": 10900 + }, + { + "epoch": 0.21, + "learning_rate": 1.5713195137706266e-05, + "loss": 0.8257, + "step": 10950 + }, + { + "epoch": 0.22, + "learning_rate": 1.569362068628027e-05, + "loss": 0.8605, + "step": 11000 + }, + { + "epoch": 0.22, + "learning_rate": 1.567404623485427e-05, + "loss": 0.7885, + "step": 11050 + }, + { + "epoch": 0.22, + "learning_rate": 1.5654471783428272e-05, + "loss": 0.8821, + "step": 11100 + }, + { + "epoch": 0.22, + "learning_rate": 1.5634897332002272e-05, + "loss": 0.8238, + "step": 11150 + }, + { + "epoch": 0.22, + "learning_rate": 1.5615322880576275e-05, + "loss": 0.9978, + "step": 11200 + }, + { + "epoch": 0.22, + "learning_rate": 1.5595748429150274e-05, + "loss": 0.8357, + "step": 11250 + }, + { + "epoch": 0.22, + "learning_rate": 1.5576173977724278e-05, + "loss": 0.9715, + "step": 11300 + }, + { + "epoch": 0.22, + "learning_rate": 1.5556599526298277e-05, + "loss": 0.8519, + "step": 11350 + }, + { + "epoch": 0.22, + "learning_rate": 1.5537025074872277e-05, + "loss": 0.7997, + "step": 11400 + }, + { + "epoch": 0.22, + "learning_rate": 1.551745062344628e-05, + "loss": 0.7839, + "step": 11450 + }, + { + "epoch": 0.23, + "learning_rate": 1.549787617202028e-05, + "loss": 0.7902, + "step": 11500 + }, + { + "epoch": 0.23, + "learning_rate": 1.5478301720594283e-05, + "loss": 0.9108, + "step": 11550 + }, + { + "epoch": 0.23, + "learning_rate": 1.5458727269168282e-05, + "loss": 0.9531, + "step": 11600 + }, + { + "epoch": 0.23, + "learning_rate": 1.5439152817742285e-05, + "loss": 0.9653, + "step": 11650 + }, + { + "epoch": 0.23, + "learning_rate": 1.5419578366316285e-05, + "loss": 0.8278, + "step": 11700 + }, + { + "epoch": 0.23, + "learning_rate": 1.5400003914890288e-05, + "loss": 0.9261, + "step": 11750 + }, + { + "epoch": 0.23, + "learning_rate": 1.5380429463464288e-05, + "loss": 0.8742, + "step": 11800 + }, + { + "epoch": 0.23, + "learning_rate": 1.536085501203829e-05, + "loss": 0.8484, + "step": 11850 + }, + { + "epoch": 0.23, + "learning_rate": 1.534128056061229e-05, + "loss": 0.9005, + "step": 11900 + }, + { + "epoch": 0.23, + "learning_rate": 1.5321706109186293e-05, + "loss": 0.9226, + "step": 11950 + }, + { + "epoch": 0.23, + "learning_rate": 1.5302131657760293e-05, + "loss": 0.8965, + "step": 12000 + }, + { + "epoch": 0.24, + "learning_rate": 1.5282557206334293e-05, + "loss": 0.9745, + "step": 12050 + }, + { + "epoch": 0.24, + "learning_rate": 1.5262982754908296e-05, + "loss": 0.9371, + "step": 12100 + }, + { + "epoch": 0.24, + "learning_rate": 1.5243408303482297e-05, + "loss": 0.8983, + "step": 12150 + }, + { + "epoch": 0.24, + "learning_rate": 1.5223833852056298e-05, + "loss": 0.9373, + "step": 12200 + }, + { + "epoch": 0.24, + "learning_rate": 1.52042594006303e-05, + "loss": 0.854, + "step": 12250 + }, + { + "epoch": 0.24, + "learning_rate": 1.5184684949204301e-05, + "loss": 0.8189, + "step": 12300 + }, + { + "epoch": 0.24, + "learning_rate": 1.5165110497778302e-05, + "loss": 0.8548, + "step": 12350 + }, + { + "epoch": 0.24, + "learning_rate": 1.5145536046352302e-05, + "loss": 0.8075, + "step": 12400 + }, + { + "epoch": 0.24, + "learning_rate": 1.5125961594926303e-05, + "loss": 0.8479, + "step": 12450 + }, + { + "epoch": 0.24, + "learning_rate": 1.5106387143500305e-05, + "loss": 0.7847, + "step": 12500 + }, + { + "epoch": 0.25, + "learning_rate": 1.5086812692074306e-05, + "loss": 0.859, + "step": 12550 + }, + { + "epoch": 0.25, + "learning_rate": 1.5067238240648307e-05, + "loss": 0.9378, + "step": 12600 + }, + { + "epoch": 0.25, + "learning_rate": 1.5047663789222309e-05, + "loss": 0.911, + "step": 12650 + }, + { + "epoch": 0.25, + "learning_rate": 1.502808933779631e-05, + "loss": 0.9074, + "step": 12700 + }, + { + "epoch": 0.25, + "learning_rate": 1.5008514886370311e-05, + "loss": 0.8419, + "step": 12750 + }, + { + "epoch": 0.25, + "learning_rate": 1.4988940434944313e-05, + "loss": 0.8942, + "step": 12800 + }, + { + "epoch": 0.25, + "learning_rate": 1.4969365983518314e-05, + "loss": 0.8709, + "step": 12850 + }, + { + "epoch": 0.25, + "learning_rate": 1.4949791532092315e-05, + "loss": 0.9247, + "step": 12900 + }, + { + "epoch": 0.25, + "learning_rate": 1.4930217080666317e-05, + "loss": 0.9094, + "step": 12950 + }, + { + "epoch": 0.25, + "learning_rate": 1.4910642629240318e-05, + "loss": 0.8503, + "step": 13000 + }, + { + "epoch": 0.26, + "learning_rate": 1.4891068177814318e-05, + "loss": 0.9849, + "step": 13050 + }, + { + "epoch": 0.26, + "learning_rate": 1.4871493726388319e-05, + "loss": 0.9683, + "step": 13100 + }, + { + "epoch": 0.26, + "learning_rate": 1.485191927496232e-05, + "loss": 0.9421, + "step": 13150 + }, + { + "epoch": 0.26, + "learning_rate": 1.4832344823536322e-05, + "loss": 0.8789, + "step": 13200 + }, + { + "epoch": 0.26, + "learning_rate": 1.4812770372110323e-05, + "loss": 0.8922, + "step": 13250 + }, + { + "epoch": 0.26, + "learning_rate": 1.4793195920684324e-05, + "loss": 0.8455, + "step": 13300 + }, + { + "epoch": 0.26, + "learning_rate": 1.4773621469258326e-05, + "loss": 0.8357, + "step": 13350 + }, + { + "epoch": 0.26, + "learning_rate": 1.4754047017832327e-05, + "loss": 0.8953, + "step": 13400 + }, + { + "epoch": 0.26, + "learning_rate": 1.4734472566406328e-05, + "loss": 0.8378, + "step": 13450 + }, + { + "epoch": 0.26, + "learning_rate": 1.471489811498033e-05, + "loss": 0.8787, + "step": 13500 + }, + { + "epoch": 0.27, + "learning_rate": 1.4695323663554331e-05, + "loss": 0.8001, + "step": 13550 + }, + { + "epoch": 0.27, + "learning_rate": 1.4675749212128332e-05, + "loss": 0.9104, + "step": 13600 + }, + { + "epoch": 0.27, + "learning_rate": 1.4656174760702334e-05, + "loss": 0.8789, + "step": 13650 + }, + { + "epoch": 0.27, + "learning_rate": 1.4636600309276333e-05, + "loss": 0.8946, + "step": 13700 + }, + { + "epoch": 0.27, + "learning_rate": 1.4617025857850335e-05, + "loss": 0.9366, + "step": 13750 + }, + { + "epoch": 0.27, + "learning_rate": 1.4597451406424336e-05, + "loss": 0.9516, + "step": 13800 + }, + { + "epoch": 0.27, + "learning_rate": 1.4577876954998337e-05, + "loss": 0.9444, + "step": 13850 + }, + { + "epoch": 0.27, + "learning_rate": 1.4558302503572339e-05, + "loss": 0.8554, + "step": 13900 + }, + { + "epoch": 0.27, + "learning_rate": 1.453872805214634e-05, + "loss": 0.9004, + "step": 13950 + }, + { + "epoch": 0.27, + "learning_rate": 1.4519153600720341e-05, + "loss": 0.7618, + "step": 14000 + }, + { + "epoch": 0.28, + "learning_rate": 1.4499579149294343e-05, + "loss": 0.8806, + "step": 14050 + }, + { + "epoch": 0.28, + "learning_rate": 1.4480004697868344e-05, + "loss": 0.901, + "step": 14100 + }, + { + "epoch": 0.28, + "learning_rate": 1.4460430246442345e-05, + "loss": 0.8916, + "step": 14150 + }, + { + "epoch": 0.28, + "learning_rate": 1.4440855795016347e-05, + "loss": 0.8715, + "step": 14200 + }, + { + "epoch": 0.28, + "learning_rate": 1.4421281343590348e-05, + "loss": 0.8867, + "step": 14250 + }, + { + "epoch": 0.28, + "learning_rate": 1.440170689216435e-05, + "loss": 0.8262, + "step": 14300 + }, + { + "epoch": 0.28, + "learning_rate": 1.438213244073835e-05, + "loss": 0.8875, + "step": 14350 + }, + { + "epoch": 0.28, + "learning_rate": 1.436255798931235e-05, + "loss": 0.8602, + "step": 14400 + }, + { + "epoch": 0.28, + "learning_rate": 1.4342983537886352e-05, + "loss": 0.9087, + "step": 14450 + }, + { + "epoch": 0.28, + "learning_rate": 1.4323409086460353e-05, + "loss": 0.8778, + "step": 14500 + }, + { + "epoch": 0.28, + "learning_rate": 1.4303834635034355e-05, + "loss": 0.8652, + "step": 14550 + }, + { + "epoch": 0.29, + "learning_rate": 1.4284260183608356e-05, + "loss": 0.8563, + "step": 14600 + }, + { + "epoch": 0.29, + "learning_rate": 1.4264685732182357e-05, + "loss": 0.8385, + "step": 14650 + }, + { + "epoch": 0.29, + "learning_rate": 1.4245111280756359e-05, + "loss": 0.8856, + "step": 14700 + }, + { + "epoch": 0.29, + "learning_rate": 1.422553682933036e-05, + "loss": 0.8771, + "step": 14750 + }, + { + "epoch": 0.29, + "learning_rate": 1.4205962377904361e-05, + "loss": 0.8611, + "step": 14800 + }, + { + "epoch": 0.29, + "learning_rate": 1.4186387926478363e-05, + "loss": 0.9059, + "step": 14850 + }, + { + "epoch": 0.29, + "learning_rate": 1.4166813475052364e-05, + "loss": 0.9306, + "step": 14900 + }, + { + "epoch": 0.29, + "learning_rate": 1.4147239023626365e-05, + "loss": 0.8578, + "step": 14950 + }, + { + "epoch": 0.29, + "learning_rate": 1.4127664572200367e-05, + "loss": 0.8826, + "step": 15000 + }, + { + "epoch": 0.29, + "learning_rate": 1.4108090120774366e-05, + "loss": 0.9013, + "step": 15050 + }, + { + "epoch": 0.3, + "learning_rate": 1.4088515669348368e-05, + "loss": 0.8291, + "step": 15100 + }, + { + "epoch": 0.3, + "learning_rate": 1.4068941217922369e-05, + "loss": 0.9133, + "step": 15150 + }, + { + "epoch": 0.3, + "learning_rate": 1.404936676649637e-05, + "loss": 0.7908, + "step": 15200 + }, + { + "epoch": 0.3, + "learning_rate": 1.4029792315070372e-05, + "loss": 0.8334, + "step": 15250 + }, + { + "epoch": 0.3, + "learning_rate": 1.4010217863644373e-05, + "loss": 0.8911, + "step": 15300 + }, + { + "epoch": 0.3, + "learning_rate": 1.3990643412218374e-05, + "loss": 0.8854, + "step": 15350 + }, + { + "epoch": 0.3, + "learning_rate": 1.3971068960792376e-05, + "loss": 0.9154, + "step": 15400 + }, + { + "epoch": 0.3, + "learning_rate": 1.3951494509366377e-05, + "loss": 0.891, + "step": 15450 + }, + { + "epoch": 0.3, + "learning_rate": 1.3931920057940378e-05, + "loss": 0.7739, + "step": 15500 + }, + { + "epoch": 0.3, + "learning_rate": 1.391234560651438e-05, + "loss": 0.8708, + "step": 15550 + }, + { + "epoch": 0.31, + "learning_rate": 1.3892771155088381e-05, + "loss": 0.7749, + "step": 15600 + }, + { + "epoch": 0.31, + "learning_rate": 1.3873196703662382e-05, + "loss": 0.9595, + "step": 15650 + }, + { + "epoch": 0.31, + "learning_rate": 1.3853622252236382e-05, + "loss": 0.8669, + "step": 15700 + }, + { + "epoch": 0.31, + "learning_rate": 1.3834047800810383e-05, + "loss": 0.9046, + "step": 15750 + }, + { + "epoch": 0.31, + "learning_rate": 1.3814473349384385e-05, + "loss": 0.8351, + "step": 15800 + }, + { + "epoch": 0.31, + "learning_rate": 1.3794898897958386e-05, + "loss": 0.9234, + "step": 15850 + }, + { + "epoch": 0.31, + "learning_rate": 1.3775324446532387e-05, + "loss": 0.8685, + "step": 15900 + }, + { + "epoch": 0.31, + "learning_rate": 1.3755749995106389e-05, + "loss": 0.8342, + "step": 15950 + }, + { + "epoch": 0.31, + "learning_rate": 1.373617554368039e-05, + "loss": 0.8972, + "step": 16000 + }, + { + "epoch": 0.31, + "learning_rate": 1.3716601092254391e-05, + "loss": 0.8457, + "step": 16050 + }, + { + "epoch": 0.32, + "learning_rate": 1.3697026640828393e-05, + "loss": 0.8288, + "step": 16100 + }, + { + "epoch": 0.32, + "learning_rate": 1.3677452189402394e-05, + "loss": 0.966, + "step": 16150 + }, + { + "epoch": 0.32, + "learning_rate": 1.3657877737976395e-05, + "loss": 0.9036, + "step": 16200 + }, + { + "epoch": 0.32, + "learning_rate": 1.3638303286550397e-05, + "loss": 0.8774, + "step": 16250 + }, + { + "epoch": 0.32, + "learning_rate": 1.3618728835124398e-05, + "loss": 0.9368, + "step": 16300 + }, + { + "epoch": 0.32, + "learning_rate": 1.35991543836984e-05, + "loss": 0.8784, + "step": 16350 + }, + { + "epoch": 0.32, + "learning_rate": 1.3579579932272399e-05, + "loss": 0.8421, + "step": 16400 + }, + { + "epoch": 0.32, + "learning_rate": 1.35600054808464e-05, + "loss": 0.8555, + "step": 16450 + }, + { + "epoch": 0.32, + "learning_rate": 1.3540431029420402e-05, + "loss": 0.8573, + "step": 16500 + }, + { + "epoch": 0.32, + "learning_rate": 1.3520856577994403e-05, + "loss": 0.8638, + "step": 16550 + }, + { + "epoch": 0.32, + "learning_rate": 1.3501282126568404e-05, + "loss": 0.924, + "step": 16600 + }, + { + "epoch": 0.33, + "learning_rate": 1.3481707675142406e-05, + "loss": 0.8371, + "step": 16650 + }, + { + "epoch": 0.33, + "learning_rate": 1.3462133223716407e-05, + "loss": 0.8086, + "step": 16700 + }, + { + "epoch": 0.33, + "learning_rate": 1.3442558772290408e-05, + "loss": 0.8677, + "step": 16750 + }, + { + "epoch": 0.33, + "learning_rate": 1.342298432086441e-05, + "loss": 0.8288, + "step": 16800 + }, + { + "epoch": 0.33, + "learning_rate": 1.3403409869438411e-05, + "loss": 0.7825, + "step": 16850 + }, + { + "epoch": 0.33, + "learning_rate": 1.3383835418012412e-05, + "loss": 0.7759, + "step": 16900 + }, + { + "epoch": 0.33, + "learning_rate": 1.3364260966586414e-05, + "loss": 0.9239, + "step": 16950 + }, + { + "epoch": 0.33, + "learning_rate": 1.3344686515160415e-05, + "loss": 0.8655, + "step": 17000 + }, + { + "epoch": 0.33, + "learning_rate": 1.3325112063734415e-05, + "loss": 0.8756, + "step": 17050 + }, + { + "epoch": 0.33, + "learning_rate": 1.3305537612308416e-05, + "loss": 0.8513, + "step": 17100 + }, + { + "epoch": 0.34, + "learning_rate": 1.3285963160882417e-05, + "loss": 0.8829, + "step": 17150 + }, + { + "epoch": 0.34, + "learning_rate": 1.3266388709456419e-05, + "loss": 0.7919, + "step": 17200 + }, + { + "epoch": 0.34, + "learning_rate": 1.324681425803042e-05, + "loss": 0.7929, + "step": 17250 + }, + { + "epoch": 0.34, + "learning_rate": 1.3227239806604421e-05, + "loss": 0.9112, + "step": 17300 + }, + { + "epoch": 0.34, + "learning_rate": 1.3207665355178423e-05, + "loss": 0.8695, + "step": 17350 + }, + { + "epoch": 0.34, + "learning_rate": 1.3188090903752424e-05, + "loss": 0.8948, + "step": 17400 + }, + { + "epoch": 0.34, + "learning_rate": 1.3168516452326425e-05, + "loss": 0.8628, + "step": 17450 + }, + { + "epoch": 0.34, + "learning_rate": 1.3148942000900427e-05, + "loss": 0.848, + "step": 17500 + }, + { + "epoch": 0.34, + "learning_rate": 1.3129367549474428e-05, + "loss": 0.9755, + "step": 17550 + }, + { + "epoch": 0.34, + "learning_rate": 1.310979309804843e-05, + "loss": 0.9757, + "step": 17600 + }, + { + "epoch": 0.35, + "learning_rate": 1.3090218646622431e-05, + "loss": 0.873, + "step": 17650 + }, + { + "epoch": 0.35, + "learning_rate": 1.307064419519643e-05, + "loss": 0.8937, + "step": 17700 + }, + { + "epoch": 0.35, + "learning_rate": 1.3051069743770432e-05, + "loss": 0.8326, + "step": 17750 + }, + { + "epoch": 0.35, + "learning_rate": 1.3031495292344433e-05, + "loss": 0.8749, + "step": 17800 + }, + { + "epoch": 0.35, + "learning_rate": 1.3011920840918435e-05, + "loss": 0.8842, + "step": 17850 + }, + { + "epoch": 0.35, + "learning_rate": 1.2992346389492436e-05, + "loss": 0.8405, + "step": 17900 + }, + { + "epoch": 0.35, + "learning_rate": 1.2972771938066437e-05, + "loss": 0.8236, + "step": 17950 + }, + { + "epoch": 0.35, + "learning_rate": 1.2953197486640439e-05, + "loss": 0.9123, + "step": 18000 + }, + { + "epoch": 0.35, + "learning_rate": 1.293362303521444e-05, + "loss": 0.868, + "step": 18050 + }, + { + "epoch": 0.35, + "learning_rate": 1.2914048583788441e-05, + "loss": 0.8031, + "step": 18100 + }, + { + "epoch": 0.36, + "learning_rate": 1.2894474132362443e-05, + "loss": 0.855, + "step": 18150 + }, + { + "epoch": 0.36, + "learning_rate": 1.2874899680936444e-05, + "loss": 0.8214, + "step": 18200 + }, + { + "epoch": 0.36, + "learning_rate": 1.2855325229510445e-05, + "loss": 0.7972, + "step": 18250 + }, + { + "epoch": 0.36, + "learning_rate": 1.2835750778084447e-05, + "loss": 0.9112, + "step": 18300 + }, + { + "epoch": 0.36, + "learning_rate": 1.2816176326658448e-05, + "loss": 0.9455, + "step": 18350 + }, + { + "epoch": 0.36, + "learning_rate": 1.2796601875232448e-05, + "loss": 0.8682, + "step": 18400 + }, + { + "epoch": 0.36, + "learning_rate": 1.2777027423806449e-05, + "loss": 0.8356, + "step": 18450 + }, + { + "epoch": 0.36, + "learning_rate": 1.275745297238045e-05, + "loss": 0.9152, + "step": 18500 + }, + { + "epoch": 0.36, + "learning_rate": 1.2737878520954452e-05, + "loss": 0.8404, + "step": 18550 + }, + { + "epoch": 0.36, + "learning_rate": 1.2718304069528453e-05, + "loss": 0.8722, + "step": 18600 + }, + { + "epoch": 0.37, + "learning_rate": 1.2698729618102454e-05, + "loss": 0.7857, + "step": 18650 + }, + { + "epoch": 0.37, + "learning_rate": 1.2679155166676456e-05, + "loss": 0.9105, + "step": 18700 + }, + { + "epoch": 0.37, + "learning_rate": 1.2659580715250457e-05, + "loss": 0.8208, + "step": 18750 + }, + { + "epoch": 0.37, + "learning_rate": 1.2640006263824458e-05, + "loss": 0.8793, + "step": 18800 + }, + { + "epoch": 0.37, + "learning_rate": 1.262043181239846e-05, + "loss": 0.8134, + "step": 18850 + }, + { + "epoch": 0.37, + "learning_rate": 1.2600857360972461e-05, + "loss": 0.9198, + "step": 18900 + }, + { + "epoch": 0.37, + "learning_rate": 1.2581282909546462e-05, + "loss": 0.8148, + "step": 18950 + }, + { + "epoch": 0.37, + "learning_rate": 1.2561708458120464e-05, + "loss": 0.8317, + "step": 19000 + }, + { + "epoch": 0.37, + "learning_rate": 1.2542134006694463e-05, + "loss": 0.8664, + "step": 19050 + }, + { + "epoch": 0.37, + "learning_rate": 1.2522559555268465e-05, + "loss": 0.9378, + "step": 19100 + }, + { + "epoch": 0.37, + "learning_rate": 1.2502985103842466e-05, + "loss": 0.8184, + "step": 19150 + }, + { + "epoch": 0.38, + "learning_rate": 1.2483410652416467e-05, + "loss": 0.8996, + "step": 19200 + }, + { + "epoch": 0.38, + "learning_rate": 1.2463836200990469e-05, + "loss": 0.9632, + "step": 19250 + }, + { + "epoch": 0.38, + "learning_rate": 1.244426174956447e-05, + "loss": 0.8904, + "step": 19300 + }, + { + "epoch": 0.38, + "learning_rate": 1.2424687298138471e-05, + "loss": 0.8046, + "step": 19350 + }, + { + "epoch": 0.38, + "learning_rate": 1.2405112846712473e-05, + "loss": 0.8562, + "step": 19400 + }, + { + "epoch": 0.38, + "learning_rate": 1.2385538395286474e-05, + "loss": 0.8907, + "step": 19450 + }, + { + "epoch": 0.38, + "learning_rate": 1.2365963943860475e-05, + "loss": 0.8435, + "step": 19500 + }, + { + "epoch": 0.38, + "learning_rate": 1.2346389492434477e-05, + "loss": 0.8589, + "step": 19550 + }, + { + "epoch": 0.38, + "learning_rate": 1.2326815041008478e-05, + "loss": 0.9204, + "step": 19600 + }, + { + "epoch": 0.38, + "learning_rate": 1.230724058958248e-05, + "loss": 0.886, + "step": 19650 + }, + { + "epoch": 0.39, + "learning_rate": 1.228766613815648e-05, + "loss": 0.8723, + "step": 19700 + }, + { + "epoch": 0.39, + "learning_rate": 1.226809168673048e-05, + "loss": 0.9037, + "step": 19750 + }, + { + "epoch": 0.39, + "learning_rate": 1.2248517235304482e-05, + "loss": 0.8555, + "step": 19800 + }, + { + "epoch": 0.39, + "learning_rate": 1.2228942783878483e-05, + "loss": 0.8601, + "step": 19850 + }, + { + "epoch": 0.39, + "learning_rate": 1.2209368332452484e-05, + "loss": 0.818, + "step": 19900 + }, + { + "epoch": 0.39, + "learning_rate": 1.2189793881026486e-05, + "loss": 0.8717, + "step": 19950 + }, + { + "epoch": 0.39, + "learning_rate": 1.2170219429600487e-05, + "loss": 0.8883, + "step": 20000 + }, + { + "epoch": 0.39, + "learning_rate": 1.2150644978174488e-05, + "loss": 0.814, + "step": 20050 + }, + { + "epoch": 0.39, + "learning_rate": 1.213107052674849e-05, + "loss": 0.9047, + "step": 20100 + }, + { + "epoch": 0.39, + "learning_rate": 1.2111496075322491e-05, + "loss": 0.9227, + "step": 20150 + }, + { + "epoch": 0.4, + "learning_rate": 1.2091921623896492e-05, + "loss": 0.971, + "step": 20200 + }, + { + "epoch": 0.4, + "learning_rate": 1.2072347172470494e-05, + "loss": 0.8127, + "step": 20250 + }, + { + "epoch": 0.4, + "learning_rate": 1.2052772721044495e-05, + "loss": 0.8432, + "step": 20300 + }, + { + "epoch": 0.4, + "learning_rate": 1.2033198269618496e-05, + "loss": 0.8525, + "step": 20350 + }, + { + "epoch": 0.4, + "learning_rate": 1.2013623818192496e-05, + "loss": 0.8678, + "step": 20400 + }, + { + "epoch": 0.4, + "learning_rate": 1.1994049366766497e-05, + "loss": 0.8416, + "step": 20450 + }, + { + "epoch": 0.4, + "learning_rate": 1.1974474915340499e-05, + "loss": 0.8313, + "step": 20500 + }, + { + "epoch": 0.4, + "learning_rate": 1.19549004639145e-05, + "loss": 0.8867, + "step": 20550 + }, + { + "epoch": 0.4, + "learning_rate": 1.1935326012488501e-05, + "loss": 0.8573, + "step": 20600 + }, + { + "epoch": 0.4, + "learning_rate": 1.1915751561062503e-05, + "loss": 0.8349, + "step": 20650 + }, + { + "epoch": 0.41, + "learning_rate": 1.1896177109636504e-05, + "loss": 0.8767, + "step": 20700 + }, + { + "epoch": 0.41, + "learning_rate": 1.1876602658210506e-05, + "loss": 0.9189, + "step": 20750 + }, + { + "epoch": 0.41, + "learning_rate": 1.1857028206784507e-05, + "loss": 0.8946, + "step": 20800 + }, + { + "epoch": 0.41, + "learning_rate": 1.1837453755358508e-05, + "loss": 0.7432, + "step": 20850 + }, + { + "epoch": 0.41, + "learning_rate": 1.181787930393251e-05, + "loss": 0.9358, + "step": 20900 + }, + { + "epoch": 0.41, + "learning_rate": 1.1798304852506511e-05, + "loss": 0.8968, + "step": 20950 + }, + { + "epoch": 0.41, + "learning_rate": 1.1778730401080512e-05, + "loss": 0.7498, + "step": 21000 + }, + { + "epoch": 0.41, + "learning_rate": 1.1759155949654512e-05, + "loss": 0.8436, + "step": 21050 + }, + { + "epoch": 0.41, + "learning_rate": 1.1739581498228513e-05, + "loss": 0.8336, + "step": 21100 + }, + { + "epoch": 0.41, + "learning_rate": 1.1720007046802515e-05, + "loss": 0.8154, + "step": 21150 + }, + { + "epoch": 0.41, + "learning_rate": 1.1700432595376516e-05, + "loss": 0.8352, + "step": 21200 + }, + { + "epoch": 0.42, + "learning_rate": 1.1680858143950517e-05, + "loss": 0.84, + "step": 21250 + }, + { + "epoch": 0.42, + "learning_rate": 1.1661283692524519e-05, + "loss": 0.9231, + "step": 21300 + }, + { + "epoch": 0.42, + "learning_rate": 1.164170924109852e-05, + "loss": 0.8819, + "step": 21350 + }, + { + "epoch": 0.42, + "learning_rate": 1.1622134789672521e-05, + "loss": 0.8829, + "step": 21400 + }, + { + "epoch": 0.42, + "learning_rate": 1.1602560338246523e-05, + "loss": 0.9224, + "step": 21450 + }, + { + "epoch": 0.42, + "learning_rate": 1.1582985886820524e-05, + "loss": 0.8652, + "step": 21500 + }, + { + "epoch": 0.42, + "learning_rate": 1.1563411435394525e-05, + "loss": 0.8843, + "step": 21550 + }, + { + "epoch": 0.42, + "learning_rate": 1.1543836983968527e-05, + "loss": 0.8513, + "step": 21600 + }, + { + "epoch": 0.42, + "learning_rate": 1.1524262532542528e-05, + "loss": 0.9332, + "step": 21650 + }, + { + "epoch": 0.42, + "learning_rate": 1.150468808111653e-05, + "loss": 0.8782, + "step": 21700 + }, + { + "epoch": 0.43, + "learning_rate": 1.1485113629690529e-05, + "loss": 0.8213, + "step": 21750 + }, + { + "epoch": 0.43, + "learning_rate": 1.146553917826453e-05, + "loss": 0.9054, + "step": 21800 + }, + { + "epoch": 0.43, + "learning_rate": 1.1445964726838532e-05, + "loss": 0.913, + "step": 21850 + }, + { + "epoch": 0.43, + "learning_rate": 1.1426390275412533e-05, + "loss": 0.9431, + "step": 21900 + }, + { + "epoch": 0.43, + "learning_rate": 1.1406815823986534e-05, + "loss": 0.889, + "step": 21950 + }, + { + "epoch": 0.43, + "learning_rate": 1.1387241372560536e-05, + "loss": 0.8428, + "step": 22000 + }, + { + "epoch": 0.43, + "learning_rate": 1.1367666921134537e-05, + "loss": 0.8754, + "step": 22050 + }, + { + "epoch": 0.43, + "learning_rate": 1.1348092469708538e-05, + "loss": 0.8986, + "step": 22100 + }, + { + "epoch": 0.43, + "learning_rate": 1.132851801828254e-05, + "loss": 0.7835, + "step": 22150 + }, + { + "epoch": 0.43, + "learning_rate": 1.1308943566856541e-05, + "loss": 0.9178, + "step": 22200 + }, + { + "epoch": 0.44, + "learning_rate": 1.1289369115430542e-05, + "loss": 0.8806, + "step": 22250 + }, + { + "epoch": 0.44, + "learning_rate": 1.1269794664004544e-05, + "loss": 0.8896, + "step": 22300 + }, + { + "epoch": 0.44, + "learning_rate": 1.1250220212578545e-05, + "loss": 0.8652, + "step": 22350 + }, + { + "epoch": 0.44, + "learning_rate": 1.1230645761152545e-05, + "loss": 0.959, + "step": 22400 + }, + { + "epoch": 0.44, + "learning_rate": 1.1211071309726546e-05, + "loss": 0.9101, + "step": 22450 + }, + { + "epoch": 0.44, + "learning_rate": 1.1191496858300547e-05, + "loss": 0.9111, + "step": 22500 + }, + { + "epoch": 0.44, + "learning_rate": 1.1171922406874549e-05, + "loss": 0.8679, + "step": 22550 + }, + { + "epoch": 0.44, + "learning_rate": 1.115234795544855e-05, + "loss": 0.9192, + "step": 22600 + }, + { + "epoch": 0.44, + "learning_rate": 1.1132773504022551e-05, + "loss": 0.869, + "step": 22650 + }, + { + "epoch": 0.44, + "learning_rate": 1.1113199052596553e-05, + "loss": 0.8669, + "step": 22700 + }, + { + "epoch": 0.45, + "learning_rate": 1.1093624601170554e-05, + "loss": 0.7818, + "step": 22750 + }, + { + "epoch": 0.45, + "learning_rate": 1.1074050149744555e-05, + "loss": 0.8594, + "step": 22800 + }, + { + "epoch": 0.45, + "learning_rate": 1.1054475698318557e-05, + "loss": 0.8431, + "step": 22850 + }, + { + "epoch": 0.45, + "learning_rate": 1.1034901246892558e-05, + "loss": 0.8806, + "step": 22900 + }, + { + "epoch": 0.45, + "learning_rate": 1.101532679546656e-05, + "loss": 0.9083, + "step": 22950 + }, + { + "epoch": 0.45, + "learning_rate": 1.099575234404056e-05, + "loss": 0.8489, + "step": 23000 + }, + { + "epoch": 0.45, + "learning_rate": 1.097617789261456e-05, + "loss": 0.9215, + "step": 23050 + }, + { + "epoch": 0.45, + "learning_rate": 1.0956603441188562e-05, + "loss": 0.8501, + "step": 23100 + }, + { + "epoch": 0.45, + "learning_rate": 1.0937028989762563e-05, + "loss": 0.8669, + "step": 23150 + }, + { + "epoch": 0.45, + "learning_rate": 1.0917454538336564e-05, + "loss": 0.8635, + "step": 23200 + }, + { + "epoch": 0.46, + "learning_rate": 1.0897880086910566e-05, + "loss": 0.8314, + "step": 23250 + }, + { + "epoch": 0.46, + "learning_rate": 1.0878305635484567e-05, + "loss": 0.9476, + "step": 23300 + }, + { + "epoch": 0.46, + "learning_rate": 1.0858731184058568e-05, + "loss": 0.9073, + "step": 23350 + }, + { + "epoch": 0.46, + "learning_rate": 1.083915673263257e-05, + "loss": 0.8649, + "step": 23400 + }, + { + "epoch": 0.46, + "learning_rate": 1.0819582281206571e-05, + "loss": 0.8369, + "step": 23450 + }, + { + "epoch": 0.46, + "learning_rate": 1.0800007829780572e-05, + "loss": 0.8857, + "step": 23500 + }, + { + "epoch": 0.46, + "learning_rate": 1.0780433378354574e-05, + "loss": 0.7556, + "step": 23550 + }, + { + "epoch": 0.46, + "learning_rate": 1.0760858926928575e-05, + "loss": 0.8351, + "step": 23600 + }, + { + "epoch": 0.46, + "learning_rate": 1.0741284475502576e-05, + "loss": 0.8619, + "step": 23650 + }, + { + "epoch": 0.46, + "learning_rate": 1.0721710024076578e-05, + "loss": 0.8475, + "step": 23700 + }, + { + "epoch": 0.46, + "learning_rate": 1.0702135572650577e-05, + "loss": 0.8963, + "step": 23750 + }, + { + "epoch": 0.47, + "learning_rate": 1.0682561121224579e-05, + "loss": 0.8072, + "step": 23800 + }, + { + "epoch": 0.47, + "learning_rate": 1.066298666979858e-05, + "loss": 0.7855, + "step": 23850 + }, + { + "epoch": 0.47, + "learning_rate": 1.0643412218372582e-05, + "loss": 0.9233, + "step": 23900 + }, + { + "epoch": 0.47, + "learning_rate": 1.0623837766946583e-05, + "loss": 0.8856, + "step": 23950 + }, + { + "epoch": 0.47, + "learning_rate": 1.0604263315520584e-05, + "loss": 0.8831, + "step": 24000 + }, + { + "epoch": 0.47, + "learning_rate": 1.0584688864094586e-05, + "loss": 0.7711, + "step": 24050 + }, + { + "epoch": 0.47, + "learning_rate": 1.0565114412668587e-05, + "loss": 0.8804, + "step": 24100 + }, + { + "epoch": 0.47, + "learning_rate": 1.0545539961242588e-05, + "loss": 0.8166, + "step": 24150 + }, + { + "epoch": 0.47, + "learning_rate": 1.052596550981659e-05, + "loss": 0.88, + "step": 24200 + }, + { + "epoch": 0.47, + "learning_rate": 1.0506391058390591e-05, + "loss": 0.8698, + "step": 24250 + }, + { + "epoch": 0.48, + "learning_rate": 1.0486816606964592e-05, + "loss": 0.8684, + "step": 24300 + }, + { + "epoch": 0.48, + "learning_rate": 1.0467242155538594e-05, + "loss": 0.93, + "step": 24350 + }, + { + "epoch": 0.48, + "learning_rate": 1.0447667704112593e-05, + "loss": 0.8597, + "step": 24400 + }, + { + "epoch": 0.48, + "learning_rate": 1.0428093252686595e-05, + "loss": 0.9301, + "step": 24450 + }, + { + "epoch": 0.48, + "learning_rate": 1.0408518801260596e-05, + "loss": 0.8598, + "step": 24500 + }, + { + "epoch": 0.48, + "learning_rate": 1.0388944349834597e-05, + "loss": 0.9186, + "step": 24550 + }, + { + "epoch": 0.48, + "learning_rate": 1.0369369898408599e-05, + "loss": 0.8626, + "step": 24600 + }, + { + "epoch": 0.48, + "learning_rate": 1.03497954469826e-05, + "loss": 0.8402, + "step": 24650 + }, + { + "epoch": 0.48, + "learning_rate": 1.0330220995556601e-05, + "loss": 0.8344, + "step": 24700 + }, + { + "epoch": 0.48, + "learning_rate": 1.0310646544130603e-05, + "loss": 0.7338, + "step": 24750 + }, + { + "epoch": 0.49, + "learning_rate": 1.0291072092704604e-05, + "loss": 0.924, + "step": 24800 + }, + { + "epoch": 0.49, + "learning_rate": 1.0271497641278605e-05, + "loss": 0.8656, + "step": 24850 + }, + { + "epoch": 0.49, + "learning_rate": 1.0251923189852607e-05, + "loss": 0.8263, + "step": 24900 + }, + { + "epoch": 0.49, + "learning_rate": 1.0232348738426608e-05, + "loss": 0.8556, + "step": 24950 + }, + { + "epoch": 0.49, + "learning_rate": 1.021277428700061e-05, + "loss": 0.8331, + "step": 25000 + }, + { + "epoch": 0.49, + "learning_rate": 1.0193199835574609e-05, + "loss": 0.7991, + "step": 25050 + }, + { + "epoch": 0.49, + "learning_rate": 1.017362538414861e-05, + "loss": 0.7974, + "step": 25100 + }, + { + "epoch": 0.49, + "learning_rate": 1.0154050932722612e-05, + "loss": 0.8658, + "step": 25150 + }, + { + "epoch": 0.49, + "learning_rate": 1.0134476481296613e-05, + "loss": 0.8685, + "step": 25200 + }, + { + "epoch": 0.49, + "learning_rate": 1.0114902029870614e-05, + "loss": 0.9491, + "step": 25250 + }, + { + "epoch": 0.5, + "learning_rate": 1.0095327578444616e-05, + "loss": 0.8939, + "step": 25300 + }, + { + "epoch": 0.5, + "learning_rate": 1.0075753127018617e-05, + "loss": 0.8762, + "step": 25350 + }, + { + "epoch": 0.5, + "learning_rate": 1.0056178675592618e-05, + "loss": 0.8753, + "step": 25400 + }, + { + "epoch": 0.5, + "learning_rate": 1.003660422416662e-05, + "loss": 0.7322, + "step": 25450 + }, + { + "epoch": 0.5, + "learning_rate": 1.0017029772740621e-05, + "loss": 0.9321, + "step": 25500 + }, + { + "epoch": 0.5, + "learning_rate": 9.99745532131462e-06, + "loss": 0.8506, + "step": 25550 + }, + { + "epoch": 0.5, + "learning_rate": 9.977880869888622e-06, + "loss": 0.8747, + "step": 25600 + }, + { + "epoch": 0.5, + "learning_rate": 9.958306418462623e-06, + "loss": 0.8376, + "step": 25650 + }, + { + "epoch": 0.5, + "learning_rate": 9.938731967036625e-06, + "loss": 0.8633, + "step": 25700 + }, + { + "epoch": 0.5, + "learning_rate": 9.919157515610626e-06, + "loss": 0.843, + "step": 25750 + }, + { + "epoch": 0.51, + "learning_rate": 9.899583064184627e-06, + "loss": 0.9029, + "step": 25800 + }, + { + "epoch": 0.51, + "learning_rate": 9.880008612758629e-06, + "loss": 0.9153, + "step": 25850 + }, + { + "epoch": 0.51, + "learning_rate": 9.86043416133263e-06, + "loss": 0.8513, + "step": 25900 + }, + { + "epoch": 0.51, + "learning_rate": 9.84085970990663e-06, + "loss": 0.8539, + "step": 25950 + }, + { + "epoch": 0.51, + "learning_rate": 9.821285258480631e-06, + "loss": 0.8501, + "step": 26000 + }, + { + "epoch": 0.51, + "learning_rate": 9.801710807054632e-06, + "loss": 0.931, + "step": 26050 + }, + { + "epoch": 0.51, + "learning_rate": 9.782136355628634e-06, + "loss": 0.8488, + "step": 26100 + }, + { + "epoch": 0.51, + "learning_rate": 9.762561904202635e-06, + "loss": 0.9098, + "step": 26150 + }, + { + "epoch": 0.51, + "learning_rate": 9.742987452776636e-06, + "loss": 0.8358, + "step": 26200 + }, + { + "epoch": 0.51, + "learning_rate": 9.723413001350638e-06, + "loss": 0.8837, + "step": 26250 + }, + { + "epoch": 0.51, + "learning_rate": 9.703838549924639e-06, + "loss": 0.7796, + "step": 26300 + }, + { + "epoch": 0.52, + "learning_rate": 9.68426409849864e-06, + "loss": 0.8227, + "step": 26350 + }, + { + "epoch": 0.52, + "learning_rate": 9.664689647072642e-06, + "loss": 0.8286, + "step": 26400 + }, + { + "epoch": 0.52, + "learning_rate": 9.645115195646643e-06, + "loss": 0.9235, + "step": 26450 + }, + { + "epoch": 0.52, + "learning_rate": 9.625540744220644e-06, + "loss": 0.845, + "step": 26500 + }, + { + "epoch": 0.52, + "learning_rate": 9.605966292794646e-06, + "loss": 0.7694, + "step": 26550 + }, + { + "epoch": 0.52, + "learning_rate": 9.586391841368647e-06, + "loss": 0.9169, + "step": 26600 + }, + { + "epoch": 0.52, + "learning_rate": 9.566817389942647e-06, + "loss": 0.9425, + "step": 26650 + }, + { + "epoch": 0.52, + "learning_rate": 9.547242938516648e-06, + "loss": 0.7728, + "step": 26700 + }, + { + "epoch": 0.52, + "learning_rate": 9.52766848709065e-06, + "loss": 0.9274, + "step": 26750 + }, + { + "epoch": 0.52, + "learning_rate": 9.50809403566465e-06, + "loss": 0.8754, + "step": 26800 + }, + { + "epoch": 0.53, + "learning_rate": 9.488519584238652e-06, + "loss": 0.8206, + "step": 26850 + }, + { + "epoch": 0.53, + "learning_rate": 9.468945132812653e-06, + "loss": 0.8349, + "step": 26900 + }, + { + "epoch": 0.53, + "learning_rate": 9.449370681386655e-06, + "loss": 0.7908, + "step": 26950 + }, + { + "epoch": 0.53, + "learning_rate": 9.429796229960656e-06, + "loss": 0.776, + "step": 27000 + }, + { + "epoch": 0.53, + "learning_rate": 9.410221778534658e-06, + "loss": 0.8717, + "step": 27050 + }, + { + "epoch": 0.53, + "learning_rate": 9.390647327108659e-06, + "loss": 0.8673, + "step": 27100 + }, + { + "epoch": 0.53, + "learning_rate": 9.37107287568266e-06, + "loss": 0.8936, + "step": 27150 + }, + { + "epoch": 0.53, + "learning_rate": 9.351498424256662e-06, + "loss": 0.8782, + "step": 27200 + }, + { + "epoch": 0.53, + "learning_rate": 9.331923972830663e-06, + "loss": 0.7767, + "step": 27250 + }, + { + "epoch": 0.53, + "learning_rate": 9.312349521404663e-06, + "loss": 0.8948, + "step": 27300 + }, + { + "epoch": 0.54, + "learning_rate": 9.292775069978664e-06, + "loss": 0.8819, + "step": 27350 + }, + { + "epoch": 0.54, + "learning_rate": 9.273200618552665e-06, + "loss": 0.8244, + "step": 27400 + }, + { + "epoch": 0.54, + "learning_rate": 9.253626167126667e-06, + "loss": 0.8453, + "step": 27450 + }, + { + "epoch": 0.54, + "learning_rate": 9.234051715700668e-06, + "loss": 0.8921, + "step": 27500 + }, + { + "epoch": 0.54, + "learning_rate": 9.21447726427467e-06, + "loss": 0.7972, + "step": 27550 + }, + { + "epoch": 0.54, + "learning_rate": 9.19490281284867e-06, + "loss": 0.8651, + "step": 27600 + }, + { + "epoch": 0.54, + "learning_rate": 9.175328361422672e-06, + "loss": 0.9117, + "step": 27650 + }, + { + "epoch": 0.54, + "learning_rate": 9.155753909996673e-06, + "loss": 0.8853, + "step": 27700 + }, + { + "epoch": 0.54, + "learning_rate": 9.136179458570675e-06, + "loss": 0.8501, + "step": 27750 + }, + { + "epoch": 0.54, + "learning_rate": 9.116605007144676e-06, + "loss": 0.8758, + "step": 27800 + }, + { + "epoch": 0.55, + "learning_rate": 9.097030555718677e-06, + "loss": 0.846, + "step": 27850 + }, + { + "epoch": 0.55, + "learning_rate": 9.077456104292679e-06, + "loss": 0.9889, + "step": 27900 + }, + { + "epoch": 0.55, + "learning_rate": 9.057881652866678e-06, + "loss": 0.8388, + "step": 27950 + }, + { + "epoch": 0.55, + "learning_rate": 9.03830720144068e-06, + "loss": 0.8852, + "step": 28000 + }, + { + "epoch": 0.55, + "learning_rate": 9.018732750014681e-06, + "loss": 0.8383, + "step": 28050 + }, + { + "epoch": 0.55, + "learning_rate": 8.999158298588682e-06, + "loss": 0.8078, + "step": 28100 + }, + { + "epoch": 0.55, + "learning_rate": 8.979583847162684e-06, + "loss": 0.8822, + "step": 28150 + }, + { + "epoch": 0.55, + "learning_rate": 8.960009395736685e-06, + "loss": 0.8824, + "step": 28200 + }, + { + "epoch": 0.55, + "learning_rate": 8.940434944310686e-06, + "loss": 0.8647, + "step": 28250 + }, + { + "epoch": 0.55, + "learning_rate": 8.920860492884688e-06, + "loss": 0.8348, + "step": 28300 + }, + { + "epoch": 0.55, + "learning_rate": 8.901286041458689e-06, + "loss": 0.7925, + "step": 28350 + }, + { + "epoch": 0.56, + "learning_rate": 8.88171159003269e-06, + "loss": 0.9533, + "step": 28400 + }, + { + "epoch": 0.56, + "learning_rate": 8.862137138606692e-06, + "loss": 0.8684, + "step": 28450 + }, + { + "epoch": 0.56, + "learning_rate": 8.842562687180693e-06, + "loss": 0.8617, + "step": 28500 + }, + { + "epoch": 0.56, + "learning_rate": 8.822988235754694e-06, + "loss": 0.8674, + "step": 28550 + }, + { + "epoch": 0.56, + "learning_rate": 8.803413784328696e-06, + "loss": 0.8993, + "step": 28600 + }, + { + "epoch": 0.56, + "learning_rate": 8.783839332902695e-06, + "loss": 0.8651, + "step": 28650 + }, + { + "epoch": 0.56, + "learning_rate": 8.764264881476697e-06, + "loss": 0.9059, + "step": 28700 + }, + { + "epoch": 0.56, + "learning_rate": 8.744690430050698e-06, + "loss": 0.8669, + "step": 28750 + }, + { + "epoch": 0.56, + "learning_rate": 8.7251159786247e-06, + "loss": 0.8995, + "step": 28800 + }, + { + "epoch": 0.56, + "learning_rate": 8.7055415271987e-06, + "loss": 0.7663, + "step": 28850 + }, + { + "epoch": 0.57, + "learning_rate": 8.685967075772702e-06, + "loss": 0.9201, + "step": 28900 + }, + { + "epoch": 0.57, + "learning_rate": 8.666392624346703e-06, + "loss": 0.8251, + "step": 28950 + }, + { + "epoch": 0.57, + "learning_rate": 8.646818172920705e-06, + "loss": 0.8798, + "step": 29000 + }, + { + "epoch": 0.57, + "learning_rate": 8.627243721494706e-06, + "loss": 0.8492, + "step": 29050 + }, + { + "epoch": 0.57, + "learning_rate": 8.607669270068707e-06, + "loss": 0.8379, + "step": 29100 + }, + { + "epoch": 0.57, + "learning_rate": 8.588094818642709e-06, + "loss": 0.8279, + "step": 29150 + }, + { + "epoch": 0.57, + "learning_rate": 8.56852036721671e-06, + "loss": 0.9321, + "step": 29200 + }, + { + "epoch": 0.57, + "learning_rate": 8.548945915790711e-06, + "loss": 0.8259, + "step": 29250 + }, + { + "epoch": 0.57, + "learning_rate": 8.529371464364711e-06, + "loss": 0.945, + "step": 29300 + }, + { + "epoch": 0.57, + "learning_rate": 8.509797012938712e-06, + "loss": 0.8367, + "step": 29350 + }, + { + "epoch": 0.58, + "learning_rate": 8.490222561512714e-06, + "loss": 0.7596, + "step": 29400 + }, + { + "epoch": 0.58, + "learning_rate": 8.470648110086715e-06, + "loss": 0.9031, + "step": 29450 + }, + { + "epoch": 0.58, + "learning_rate": 8.451073658660716e-06, + "loss": 0.9436, + "step": 29500 + }, + { + "epoch": 0.58, + "learning_rate": 8.431499207234718e-06, + "loss": 0.8308, + "step": 29550 + }, + { + "epoch": 0.58, + "learning_rate": 8.411924755808719e-06, + "loss": 0.7965, + "step": 29600 + }, + { + "epoch": 0.58, + "learning_rate": 8.39235030438272e-06, + "loss": 0.8116, + "step": 29650 + }, + { + "epoch": 0.58, + "learning_rate": 8.372775852956722e-06, + "loss": 0.894, + "step": 29700 + }, + { + "epoch": 0.58, + "learning_rate": 8.353201401530723e-06, + "loss": 0.8788, + "step": 29750 + }, + { + "epoch": 0.58, + "learning_rate": 8.333626950104724e-06, + "loss": 0.8463, + "step": 29800 + }, + { + "epoch": 0.58, + "learning_rate": 8.314052498678726e-06, + "loss": 0.7681, + "step": 29850 + }, + { + "epoch": 0.59, + "learning_rate": 8.294478047252727e-06, + "loss": 0.8351, + "step": 29900 + }, + { + "epoch": 0.59, + "learning_rate": 8.274903595826728e-06, + "loss": 0.8549, + "step": 29950 + }, + { + "epoch": 0.59, + "learning_rate": 8.255329144400728e-06, + "loss": 0.9201, + "step": 30000 + }, + { + "epoch": 0.59, + "learning_rate": 8.23575469297473e-06, + "loss": 0.8351, + "step": 30050 + }, + { + "epoch": 0.59, + "learning_rate": 8.21618024154873e-06, + "loss": 0.8675, + "step": 30100 + }, + { + "epoch": 0.59, + "learning_rate": 8.196605790122732e-06, + "loss": 0.7949, + "step": 30150 + }, + { + "epoch": 0.59, + "learning_rate": 8.177031338696733e-06, + "loss": 0.8228, + "step": 30200 + }, + { + "epoch": 0.59, + "learning_rate": 8.157456887270735e-06, + "loss": 0.861, + "step": 30250 + }, + { + "epoch": 0.59, + "learning_rate": 8.137882435844736e-06, + "loss": 0.9081, + "step": 30300 + }, + { + "epoch": 0.59, + "learning_rate": 8.118307984418738e-06, + "loss": 0.7959, + "step": 30350 + }, + { + "epoch": 0.6, + "learning_rate": 8.098733532992739e-06, + "loss": 0.8647, + "step": 30400 + }, + { + "epoch": 0.6, + "learning_rate": 8.07915908156674e-06, + "loss": 0.8884, + "step": 30450 + }, + { + "epoch": 0.6, + "learning_rate": 8.059584630140742e-06, + "loss": 0.7761, + "step": 30500 + }, + { + "epoch": 0.6, + "learning_rate": 8.040010178714743e-06, + "loss": 0.9423, + "step": 30550 + }, + { + "epoch": 0.6, + "learning_rate": 8.020435727288744e-06, + "loss": 0.8885, + "step": 30600 + }, + { + "epoch": 0.6, + "learning_rate": 8.000861275862744e-06, + "loss": 0.9188, + "step": 30650 + }, + { + "epoch": 0.6, + "learning_rate": 7.981286824436745e-06, + "loss": 0.7685, + "step": 30700 + }, + { + "epoch": 0.6, + "learning_rate": 7.961712373010747e-06, + "loss": 0.8876, + "step": 30750 + }, + { + "epoch": 0.6, + "learning_rate": 7.942137921584748e-06, + "loss": 0.8314, + "step": 30800 + }, + { + "epoch": 0.6, + "learning_rate": 7.92256347015875e-06, + "loss": 0.963, + "step": 30850 + }, + { + "epoch": 0.6, + "learning_rate": 7.90298901873275e-06, + "loss": 0.8584, + "step": 30900 + }, + { + "epoch": 0.61, + "learning_rate": 7.883414567306752e-06, + "loss": 0.8097, + "step": 30950 + }, + { + "epoch": 0.61, + "learning_rate": 7.863840115880753e-06, + "loss": 0.868, + "step": 31000 + }, + { + "epoch": 0.61, + "learning_rate": 7.844265664454755e-06, + "loss": 0.8494, + "step": 31050 + }, + { + "epoch": 0.61, + "learning_rate": 7.824691213028756e-06, + "loss": 0.7916, + "step": 31100 + }, + { + "epoch": 0.61, + "learning_rate": 7.805116761602757e-06, + "loss": 0.9308, + "step": 31150 + }, + { + "epoch": 0.61, + "learning_rate": 7.785542310176759e-06, + "loss": 0.7219, + "step": 31200 + }, + { + "epoch": 0.61, + "learning_rate": 7.76596785875076e-06, + "loss": 0.8988, + "step": 31250 + }, + { + "epoch": 0.61, + "learning_rate": 7.74639340732476e-06, + "loss": 0.7423, + "step": 31300 + }, + { + "epoch": 0.61, + "learning_rate": 7.726818955898761e-06, + "loss": 0.7179, + "step": 31350 + }, + { + "epoch": 0.61, + "learning_rate": 7.707244504472762e-06, + "loss": 0.9416, + "step": 31400 + }, + { + "epoch": 0.62, + "learning_rate": 7.687670053046764e-06, + "loss": 0.8769, + "step": 31450 + }, + { + "epoch": 0.62, + "learning_rate": 7.668095601620765e-06, + "loss": 0.992, + "step": 31500 + }, + { + "epoch": 0.62, + "learning_rate": 7.648521150194766e-06, + "loss": 0.8055, + "step": 31550 + }, + { + "epoch": 0.62, + "learning_rate": 7.628946698768768e-06, + "loss": 0.8286, + "step": 31600 + }, + { + "epoch": 0.62, + "learning_rate": 7.609372247342769e-06, + "loss": 0.7925, + "step": 31650 + }, + { + "epoch": 0.62, + "learning_rate": 7.58979779591677e-06, + "loss": 0.812, + "step": 31700 + }, + { + "epoch": 0.62, + "learning_rate": 7.570223344490772e-06, + "loss": 0.8058, + "step": 31750 + }, + { + "epoch": 0.62, + "learning_rate": 7.550648893064772e-06, + "loss": 0.8939, + "step": 31800 + }, + { + "epoch": 0.62, + "learning_rate": 7.5310744416387735e-06, + "loss": 0.9144, + "step": 31850 + }, + { + "epoch": 0.62, + "learning_rate": 7.511499990212775e-06, + "loss": 0.856, + "step": 31900 + }, + { + "epoch": 0.63, + "learning_rate": 7.491925538786776e-06, + "loss": 0.8356, + "step": 31950 + }, + { + "epoch": 0.63, + "learning_rate": 7.4723510873607775e-06, + "loss": 0.8551, + "step": 32000 + }, + { + "epoch": 0.63, + "learning_rate": 7.452776635934779e-06, + "loss": 0.8674, + "step": 32050 + }, + { + "epoch": 0.63, + "learning_rate": 7.43320218450878e-06, + "loss": 0.8289, + "step": 32100 + }, + { + "epoch": 0.63, + "learning_rate": 7.413627733082781e-06, + "loss": 0.9137, + "step": 32150 + }, + { + "epoch": 0.63, + "learning_rate": 7.394053281656782e-06, + "loss": 0.8694, + "step": 32200 + }, + { + "epoch": 0.63, + "learning_rate": 7.374478830230783e-06, + "loss": 0.8175, + "step": 32250 + }, + { + "epoch": 0.63, + "learning_rate": 7.354904378804785e-06, + "loss": 0.8521, + "step": 32300 + }, + { + "epoch": 0.63, + "learning_rate": 7.335329927378786e-06, + "loss": 0.8094, + "step": 32350 + }, + { + "epoch": 0.63, + "learning_rate": 7.315755475952787e-06, + "loss": 0.8094, + "step": 32400 + }, + { + "epoch": 0.64, + "learning_rate": 7.296181024526789e-06, + "loss": 0.8146, + "step": 32450 + }, + { + "epoch": 0.64, + "learning_rate": 7.276606573100789e-06, + "loss": 0.8444, + "step": 32500 + }, + { + "epoch": 0.64, + "learning_rate": 7.257032121674791e-06, + "loss": 0.8016, + "step": 32550 + }, + { + "epoch": 0.64, + "learning_rate": 7.237457670248792e-06, + "loss": 0.8408, + "step": 32600 + }, + { + "epoch": 0.64, + "learning_rate": 7.217883218822793e-06, + "loss": 0.8744, + "step": 32650 + }, + { + "epoch": 0.64, + "learning_rate": 7.198308767396795e-06, + "loss": 0.8412, + "step": 32700 + }, + { + "epoch": 0.64, + "learning_rate": 7.178734315970796e-06, + "loss": 0.8446, + "step": 32750 + }, + { + "epoch": 0.64, + "learning_rate": 7.1591598645447964e-06, + "loss": 0.8469, + "step": 32800 + }, + { + "epoch": 0.64, + "learning_rate": 7.139585413118798e-06, + "loss": 0.7875, + "step": 32850 + }, + { + "epoch": 0.64, + "learning_rate": 7.120010961692799e-06, + "loss": 0.8713, + "step": 32900 + }, + { + "epoch": 0.64, + "learning_rate": 7.1004365102668005e-06, + "loss": 0.8614, + "step": 32950 + }, + { + "epoch": 0.65, + "learning_rate": 7.080862058840802e-06, + "loss": 0.8067, + "step": 33000 + }, + { + "epoch": 0.65, + "learning_rate": 7.061287607414803e-06, + "loss": 0.8323, + "step": 33050 + }, + { + "epoch": 0.65, + "learning_rate": 7.0417131559888045e-06, + "loss": 0.8127, + "step": 33100 + }, + { + "epoch": 0.65, + "learning_rate": 7.022138704562805e-06, + "loss": 0.9393, + "step": 33150 + }, + { + "epoch": 0.65, + "learning_rate": 7.002564253136806e-06, + "loss": 0.8109, + "step": 33200 + }, + { + "epoch": 0.65, + "learning_rate": 6.982989801710808e-06, + "loss": 0.871, + "step": 33250 + }, + { + "epoch": 0.65, + "learning_rate": 6.963415350284809e-06, + "loss": 0.7685, + "step": 33300 + }, + { + "epoch": 0.65, + "learning_rate": 6.94384089885881e-06, + "loss": 0.8988, + "step": 33350 + }, + { + "epoch": 0.65, + "learning_rate": 6.924266447432812e-06, + "loss": 0.8079, + "step": 33400 + }, + { + "epoch": 0.65, + "learning_rate": 6.904691996006813e-06, + "loss": 0.8664, + "step": 33450 + }, + { + "epoch": 0.66, + "learning_rate": 6.8851175445808135e-06, + "loss": 0.8659, + "step": 33500 + }, + { + "epoch": 0.66, + "learning_rate": 6.865543093154815e-06, + "loss": 0.8087, + "step": 33550 + }, + { + "epoch": 0.66, + "learning_rate": 6.845968641728816e-06, + "loss": 0.9701, + "step": 33600 + }, + { + "epoch": 0.66, + "learning_rate": 6.8263941903028175e-06, + "loss": 0.8431, + "step": 33650 + }, + { + "epoch": 0.66, + "learning_rate": 6.806819738876819e-06, + "loss": 0.8664, + "step": 33700 + }, + { + "epoch": 0.66, + "learning_rate": 6.78724528745082e-06, + "loss": 0.8538, + "step": 33750 + }, + { + "epoch": 0.66, + "learning_rate": 6.767670836024821e-06, + "loss": 0.9135, + "step": 33800 + }, + { + "epoch": 0.66, + "learning_rate": 6.748096384598822e-06, + "loss": 0.8, + "step": 33850 + }, + { + "epoch": 0.66, + "learning_rate": 6.728521933172823e-06, + "loss": 0.8769, + "step": 33900 + }, + { + "epoch": 0.66, + "learning_rate": 6.708947481746825e-06, + "loss": 0.8763, + "step": 33950 + }, + { + "epoch": 0.67, + "learning_rate": 6.689373030320826e-06, + "loss": 0.8174, + "step": 34000 + }, + { + "epoch": 0.67, + "learning_rate": 6.669798578894827e-06, + "loss": 0.7703, + "step": 34050 + }, + { + "epoch": 0.67, + "learning_rate": 6.650224127468829e-06, + "loss": 0.9214, + "step": 34100 + }, + { + "epoch": 0.67, + "learning_rate": 6.630649676042829e-06, + "loss": 0.8517, + "step": 34150 + }, + { + "epoch": 0.67, + "learning_rate": 6.611075224616831e-06, + "loss": 0.8129, + "step": 34200 + }, + { + "epoch": 0.67, + "learning_rate": 6.591500773190832e-06, + "loss": 0.8221, + "step": 34250 + }, + { + "epoch": 0.67, + "learning_rate": 6.571926321764833e-06, + "loss": 0.8089, + "step": 34300 + }, + { + "epoch": 0.67, + "learning_rate": 6.552351870338835e-06, + "loss": 0.9105, + "step": 34350 + }, + { + "epoch": 0.67, + "learning_rate": 6.532777418912836e-06, + "loss": 0.7871, + "step": 34400 + }, + { + "epoch": 0.67, + "learning_rate": 6.513202967486837e-06, + "loss": 0.7993, + "step": 34450 + }, + { + "epoch": 0.68, + "learning_rate": 6.493628516060838e-06, + "loss": 0.7592, + "step": 34500 + }, + { + "epoch": 0.68, + "learning_rate": 6.474054064634839e-06, + "loss": 0.8226, + "step": 34550 + }, + { + "epoch": 0.68, + "learning_rate": 6.4544796132088405e-06, + "loss": 0.8362, + "step": 34600 + }, + { + "epoch": 0.68, + "learning_rate": 6.434905161782842e-06, + "loss": 0.8218, + "step": 34650 + }, + { + "epoch": 0.68, + "learning_rate": 6.415330710356843e-06, + "loss": 0.7943, + "step": 34700 + }, + { + "epoch": 0.68, + "learning_rate": 6.3957562589308445e-06, + "loss": 0.9096, + "step": 34750 + }, + { + "epoch": 0.68, + "learning_rate": 6.376181807504845e-06, + "loss": 0.8132, + "step": 34800 + }, + { + "epoch": 0.68, + "learning_rate": 6.356607356078846e-06, + "loss": 0.8449, + "step": 34850 + }, + { + "epoch": 0.68, + "learning_rate": 6.337032904652848e-06, + "loss": 0.8221, + "step": 34900 + }, + { + "epoch": 0.68, + "learning_rate": 6.317458453226849e-06, + "loss": 0.8582, + "step": 34950 + }, + { + "epoch": 0.69, + "learning_rate": 6.29788400180085e-06, + "loss": 0.869, + "step": 35000 + }, + { + "epoch": 0.69, + "learning_rate": 6.278309550374852e-06, + "loss": 0.8262, + "step": 35050 + }, + { + "epoch": 0.69, + "learning_rate": 6.258735098948853e-06, + "loss": 0.8728, + "step": 35100 + }, + { + "epoch": 0.69, + "learning_rate": 6.2391606475228535e-06, + "loss": 0.9178, + "step": 35150 + }, + { + "epoch": 0.69, + "learning_rate": 6.219586196096855e-06, + "loss": 0.7946, + "step": 35200 + }, + { + "epoch": 0.69, + "learning_rate": 6.200011744670856e-06, + "loss": 0.8229, + "step": 35250 + }, + { + "epoch": 0.69, + "learning_rate": 6.1804372932448575e-06, + "loss": 0.8999, + "step": 35300 + }, + { + "epoch": 0.69, + "learning_rate": 6.160862841818859e-06, + "loss": 0.9206, + "step": 35350 + }, + { + "epoch": 0.69, + "learning_rate": 6.14128839039286e-06, + "loss": 0.8875, + "step": 35400 + }, + { + "epoch": 0.69, + "learning_rate": 6.1217139389668616e-06, + "loss": 0.8835, + "step": 35450 + }, + { + "epoch": 0.69, + "learning_rate": 6.102139487540862e-06, + "loss": 0.8897, + "step": 35500 + }, + { + "epoch": 0.7, + "learning_rate": 6.082565036114863e-06, + "loss": 0.9601, + "step": 35550 + }, + { + "epoch": 0.7, + "learning_rate": 6.062990584688865e-06, + "loss": 0.7805, + "step": 35600 + }, + { + "epoch": 0.7, + "learning_rate": 6.043416133262866e-06, + "loss": 0.9185, + "step": 35650 + }, + { + "epoch": 0.7, + "learning_rate": 6.0238416818368674e-06, + "loss": 0.7948, + "step": 35700 + }, + { + "epoch": 0.7, + "learning_rate": 6.004267230410869e-06, + "loss": 0.8508, + "step": 35750 + }, + { + "epoch": 0.7, + "learning_rate": 5.984692778984869e-06, + "loss": 0.8683, + "step": 35800 + }, + { + "epoch": 0.7, + "learning_rate": 5.965118327558871e-06, + "loss": 0.889, + "step": 35850 + }, + { + "epoch": 0.7, + "learning_rate": 5.945543876132872e-06, + "loss": 0.8705, + "step": 35900 + }, + { + "epoch": 0.7, + "learning_rate": 5.925969424706873e-06, + "loss": 0.8509, + "step": 35950 + }, + { + "epoch": 0.7, + "learning_rate": 5.906394973280875e-06, + "loss": 0.8239, + "step": 36000 + }, + { + "epoch": 0.71, + "learning_rate": 5.886820521854876e-06, + "loss": 0.817, + "step": 36050 + }, + { + "epoch": 0.71, + "learning_rate": 5.867246070428877e-06, + "loss": 0.8169, + "step": 36100 + }, + { + "epoch": 0.71, + "learning_rate": 5.847671619002878e-06, + "loss": 0.8845, + "step": 36150 + }, + { + "epoch": 0.71, + "learning_rate": 5.828097167576879e-06, + "loss": 0.7943, + "step": 36200 + }, + { + "epoch": 0.71, + "learning_rate": 5.8085227161508805e-06, + "loss": 0.8947, + "step": 36250 + }, + { + "epoch": 0.71, + "learning_rate": 5.788948264724882e-06, + "loss": 0.9, + "step": 36300 + }, + { + "epoch": 0.71, + "learning_rate": 5.769373813298883e-06, + "loss": 0.8847, + "step": 36350 + }, + { + "epoch": 0.71, + "learning_rate": 5.7497993618728845e-06, + "loss": 0.868, + "step": 36400 + }, + { + "epoch": 0.71, + "learning_rate": 5.730224910446886e-06, + "loss": 0.7603, + "step": 36450 + }, + { + "epoch": 0.71, + "learning_rate": 5.710650459020886e-06, + "loss": 0.8632, + "step": 36500 + }, + { + "epoch": 0.72, + "learning_rate": 5.691076007594888e-06, + "loss": 0.8426, + "step": 36550 + }, + { + "epoch": 0.72, + "learning_rate": 5.671501556168889e-06, + "loss": 0.939, + "step": 36600 + }, + { + "epoch": 0.72, + "learning_rate": 5.65192710474289e-06, + "loss": 0.8623, + "step": 36650 + }, + { + "epoch": 0.72, + "learning_rate": 5.632352653316892e-06, + "loss": 0.8796, + "step": 36700 + }, + { + "epoch": 0.72, + "learning_rate": 5.612778201890893e-06, + "loss": 0.9162, + "step": 36750 + }, + { + "epoch": 0.72, + "learning_rate": 5.5932037504648935e-06, + "loss": 0.7553, + "step": 36800 + }, + { + "epoch": 0.72, + "learning_rate": 5.573629299038895e-06, + "loss": 0.8613, + "step": 36850 + }, + { + "epoch": 0.72, + "learning_rate": 5.554054847612896e-06, + "loss": 0.9001, + "step": 36900 + }, + { + "epoch": 0.72, + "learning_rate": 5.5344803961868976e-06, + "loss": 0.9094, + "step": 36950 + }, + { + "epoch": 0.72, + "learning_rate": 5.514905944760899e-06, + "loss": 0.8644, + "step": 37000 + }, + { + "epoch": 0.73, + "learning_rate": 5.4953314933349e-06, + "loss": 0.8831, + "step": 37050 + }, + { + "epoch": 0.73, + "learning_rate": 5.4757570419089016e-06, + "loss": 0.8597, + "step": 37100 + }, + { + "epoch": 0.73, + "learning_rate": 5.456182590482902e-06, + "loss": 0.7675, + "step": 37150 + }, + { + "epoch": 0.73, + "learning_rate": 5.436608139056903e-06, + "loss": 0.8921, + "step": 37200 + }, + { + "epoch": 0.73, + "learning_rate": 5.417033687630905e-06, + "loss": 0.8159, + "step": 37250 + }, + { + "epoch": 0.73, + "learning_rate": 5.397459236204906e-06, + "loss": 0.8174, + "step": 37300 + }, + { + "epoch": 0.73, + "learning_rate": 5.3778847847789074e-06, + "loss": 0.8421, + "step": 37350 + }, + { + "epoch": 0.73, + "learning_rate": 5.358310333352909e-06, + "loss": 0.8831, + "step": 37400 + }, + { + "epoch": 0.73, + "learning_rate": 5.33873588192691e-06, + "loss": 0.9144, + "step": 37450 + }, + { + "epoch": 0.73, + "learning_rate": 5.319161430500911e-06, + "loss": 0.7922, + "step": 37500 + }, + { + "epoch": 0.74, + "learning_rate": 5.299586979074912e-06, + "loss": 0.8456, + "step": 37550 + }, + { + "epoch": 0.74, + "learning_rate": 5.280012527648913e-06, + "loss": 0.8568, + "step": 37600 + }, + { + "epoch": 0.74, + "learning_rate": 5.260438076222915e-06, + "loss": 0.8747, + "step": 37650 + }, + { + "epoch": 0.74, + "learning_rate": 5.240863624796916e-06, + "loss": 0.8253, + "step": 37700 + }, + { + "epoch": 0.74, + "learning_rate": 5.221289173370917e-06, + "loss": 0.9293, + "step": 37750 + }, + { + "epoch": 0.74, + "learning_rate": 5.201714721944918e-06, + "loss": 0.7685, + "step": 37800 + }, + { + "epoch": 0.74, + "learning_rate": 5.182140270518919e-06, + "loss": 0.8248, + "step": 37850 + }, + { + "epoch": 0.74, + "learning_rate": 5.1625658190929205e-06, + "loss": 0.828, + "step": 37900 + }, + { + "epoch": 0.74, + "learning_rate": 5.142991367666922e-06, + "loss": 0.8755, + "step": 37950 + }, + { + "epoch": 0.74, + "learning_rate": 5.123416916240923e-06, + "loss": 0.7804, + "step": 38000 + }, + { + "epoch": 0.74, + "learning_rate": 5.1038424648149245e-06, + "loss": 0.8399, + "step": 38050 + }, + { + "epoch": 0.75, + "learning_rate": 5.084268013388926e-06, + "loss": 0.936, + "step": 38100 + }, + { + "epoch": 0.75, + "learning_rate": 5.064693561962926e-06, + "loss": 0.8735, + "step": 38150 + }, + { + "epoch": 0.75, + "learning_rate": 5.045119110536928e-06, + "loss": 0.7677, + "step": 38200 + }, + { + "epoch": 0.75, + "learning_rate": 5.025544659110929e-06, + "loss": 0.8416, + "step": 38250 + }, + { + "epoch": 0.75, + "learning_rate": 5.00597020768493e-06, + "loss": 0.7239, + "step": 38300 + }, + { + "epoch": 0.75, + "learning_rate": 4.986395756258931e-06, + "loss": 0.8729, + "step": 38350 + }, + { + "epoch": 0.75, + "learning_rate": 4.966821304832932e-06, + "loss": 0.8779, + "step": 38400 + }, + { + "epoch": 0.75, + "learning_rate": 4.9472468534069335e-06, + "loss": 0.9235, + "step": 38450 + }, + { + "epoch": 0.75, + "learning_rate": 4.927672401980935e-06, + "loss": 0.8315, + "step": 38500 + }, + { + "epoch": 0.75, + "learning_rate": 4.908097950554936e-06, + "loss": 0.7789, + "step": 38550 + }, + { + "epoch": 0.76, + "learning_rate": 4.888523499128937e-06, + "loss": 0.8934, + "step": 38600 + }, + { + "epoch": 0.76, + "learning_rate": 4.868949047702938e-06, + "loss": 0.8634, + "step": 38650 + }, + { + "epoch": 0.76, + "learning_rate": 4.849374596276939e-06, + "loss": 0.7719, + "step": 38700 + }, + { + "epoch": 0.76, + "learning_rate": 4.829800144850941e-06, + "loss": 0.8202, + "step": 38750 + }, + { + "epoch": 0.76, + "learning_rate": 4.810225693424942e-06, + "loss": 0.8192, + "step": 38800 + }, + { + "epoch": 0.76, + "learning_rate": 4.790651241998943e-06, + "loss": 0.8286, + "step": 38850 + }, + { + "epoch": 0.76, + "learning_rate": 4.771076790572945e-06, + "loss": 0.8023, + "step": 38900 + }, + { + "epoch": 0.76, + "learning_rate": 4.751502339146945e-06, + "loss": 0.8284, + "step": 38950 + }, + { + "epoch": 0.76, + "learning_rate": 4.731927887720947e-06, + "loss": 0.7514, + "step": 39000 + }, + { + "epoch": 0.76, + "learning_rate": 4.712353436294948e-06, + "loss": 0.8086, + "step": 39050 + }, + { + "epoch": 0.77, + "learning_rate": 4.692778984868949e-06, + "loss": 0.8498, + "step": 39100 + }, + { + "epoch": 0.77, + "learning_rate": 4.673204533442951e-06, + "loss": 0.8595, + "step": 39150 + }, + { + "epoch": 0.77, + "learning_rate": 4.653630082016952e-06, + "loss": 0.7855, + "step": 39200 + }, + { + "epoch": 0.77, + "learning_rate": 4.6340556305909524e-06, + "loss": 0.85, + "step": 39250 + }, + { + "epoch": 0.77, + "learning_rate": 4.614481179164954e-06, + "loss": 0.9688, + "step": 39300 + }, + { + "epoch": 0.77, + "learning_rate": 4.594906727738955e-06, + "loss": 0.7548, + "step": 39350 + }, + { + "epoch": 0.77, + "learning_rate": 4.5753322763129565e-06, + "loss": 0.8934, + "step": 39400 + }, + { + "epoch": 0.77, + "learning_rate": 4.555757824886958e-06, + "loss": 0.8086, + "step": 39450 + }, + { + "epoch": 0.77, + "learning_rate": 4.536183373460959e-06, + "loss": 0.8638, + "step": 39500 + }, + { + "epoch": 0.77, + "learning_rate": 4.5166089220349605e-06, + "loss": 0.8957, + "step": 39550 + }, + { + "epoch": 0.78, + "learning_rate": 4.497034470608961e-06, + "loss": 0.935, + "step": 39600 + }, + { + "epoch": 0.78, + "learning_rate": 4.477460019182962e-06, + "loss": 0.8048, + "step": 39650 + }, + { + "epoch": 0.78, + "learning_rate": 4.457885567756964e-06, + "loss": 0.8327, + "step": 39700 + }, + { + "epoch": 0.78, + "learning_rate": 4.438311116330965e-06, + "loss": 0.7151, + "step": 39750 + }, + { + "epoch": 0.78, + "learning_rate": 4.418736664904966e-06, + "loss": 0.8875, + "step": 39800 + }, + { + "epoch": 0.78, + "learning_rate": 4.399162213478968e-06, + "loss": 0.8725, + "step": 39850 + }, + { + "epoch": 0.78, + "learning_rate": 4.379587762052969e-06, + "loss": 0.9833, + "step": 39900 + }, + { + "epoch": 0.78, + "learning_rate": 4.3600133106269695e-06, + "loss": 0.8513, + "step": 39950 + }, + { + "epoch": 0.78, + "learning_rate": 4.340438859200971e-06, + "loss": 0.8536, + "step": 40000 + }, + { + "epoch": 0.78, + "learning_rate": 4.320864407774972e-06, + "loss": 0.8258, + "step": 40050 + }, + { + "epoch": 0.78, + "learning_rate": 4.3012899563489735e-06, + "loss": 0.822, + "step": 40100 + }, + { + "epoch": 0.79, + "learning_rate": 4.281715504922975e-06, + "loss": 0.9229, + "step": 40150 + }, + { + "epoch": 0.79, + "learning_rate": 4.262141053496976e-06, + "loss": 0.8064, + "step": 40200 + }, + { + "epoch": 0.79, + "learning_rate": 4.2425666020709776e-06, + "loss": 0.8638, + "step": 40250 + }, + { + "epoch": 0.79, + "learning_rate": 4.222992150644978e-06, + "loss": 0.8485, + "step": 40300 + }, + { + "epoch": 0.79, + "learning_rate": 4.203417699218979e-06, + "loss": 0.874, + "step": 40350 + }, + { + "epoch": 0.79, + "learning_rate": 4.183843247792981e-06, + "loss": 0.8052, + "step": 40400 + }, + { + "epoch": 0.79, + "learning_rate": 4.164268796366982e-06, + "loss": 0.8778, + "step": 40450 + }, + { + "epoch": 0.79, + "learning_rate": 4.1446943449409834e-06, + "loss": 0.8941, + "step": 40500 + }, + { + "epoch": 0.79, + "learning_rate": 4.125119893514985e-06, + "loss": 0.8616, + "step": 40550 + }, + { + "epoch": 0.79, + "learning_rate": 4.105545442088985e-06, + "loss": 0.8288, + "step": 40600 + }, + { + "epoch": 0.8, + "learning_rate": 4.085970990662987e-06, + "loss": 0.8596, + "step": 40650 + }, + { + "epoch": 0.8, + "learning_rate": 4.066396539236988e-06, + "loss": 0.7813, + "step": 40700 + }, + { + "epoch": 0.8, + "learning_rate": 4.046822087810989e-06, + "loss": 0.8639, + "step": 40750 + }, + { + "epoch": 0.8, + "learning_rate": 4.027247636384991e-06, + "loss": 0.7813, + "step": 40800 + }, + { + "epoch": 0.8, + "learning_rate": 4.007673184958992e-06, + "loss": 0.8996, + "step": 40850 + }, + { + "epoch": 0.8, + "learning_rate": 3.988098733532993e-06, + "loss": 0.8716, + "step": 40900 + }, + { + "epoch": 0.8, + "learning_rate": 3.968524282106994e-06, + "loss": 0.7635, + "step": 40950 + }, + { + "epoch": 0.8, + "learning_rate": 3.948949830680995e-06, + "loss": 0.9362, + "step": 41000 + }, + { + "epoch": 0.8, + "learning_rate": 3.9293753792549965e-06, + "loss": 0.8802, + "step": 41050 + }, + { + "epoch": 0.8, + "learning_rate": 3.909800927828998e-06, + "loss": 0.8532, + "step": 41100 + }, + { + "epoch": 0.81, + "learning_rate": 3.890226476402999e-06, + "loss": 0.9293, + "step": 41150 + }, + { + "epoch": 0.81, + "learning_rate": 3.8706520249770005e-06, + "loss": 0.8303, + "step": 41200 + }, + { + "epoch": 0.81, + "learning_rate": 3.851077573551002e-06, + "loss": 0.8075, + "step": 41250 + }, + { + "epoch": 0.81, + "learning_rate": 3.831503122125002e-06, + "loss": 0.8403, + "step": 41300 + }, + { + "epoch": 0.81, + "learning_rate": 3.8119286706990037e-06, + "loss": 0.7709, + "step": 41350 + }, + { + "epoch": 0.81, + "learning_rate": 3.792354219273005e-06, + "loss": 0.8533, + "step": 41400 + }, + { + "epoch": 0.81, + "learning_rate": 3.7727797678470064e-06, + "loss": 0.8405, + "step": 41450 + }, + { + "epoch": 0.81, + "learning_rate": 3.7532053164210077e-06, + "loss": 0.9128, + "step": 41500 + }, + { + "epoch": 0.81, + "learning_rate": 3.7336308649950086e-06, + "loss": 0.8026, + "step": 41550 + }, + { + "epoch": 0.81, + "learning_rate": 3.71405641356901e-06, + "loss": 0.9514, + "step": 41600 + }, + { + "epoch": 0.82, + "learning_rate": 3.6944819621430113e-06, + "loss": 0.7858, + "step": 41650 + }, + { + "epoch": 0.82, + "learning_rate": 3.6749075107170122e-06, + "loss": 0.8624, + "step": 41700 + }, + { + "epoch": 0.82, + "learning_rate": 3.6553330592910136e-06, + "loss": 0.8648, + "step": 41750 + }, + { + "epoch": 0.82, + "learning_rate": 3.635758607865015e-06, + "loss": 0.8852, + "step": 41800 + }, + { + "epoch": 0.82, + "learning_rate": 3.616184156439016e-06, + "loss": 0.8459, + "step": 41850 + }, + { + "epoch": 0.82, + "learning_rate": 3.596609705013017e-06, + "loss": 0.8714, + "step": 41900 + }, + { + "epoch": 0.82, + "learning_rate": 3.5770352535870185e-06, + "loss": 0.939, + "step": 41950 + }, + { + "epoch": 0.82, + "learning_rate": 3.55746080216102e-06, + "loss": 0.8739, + "step": 42000 + }, + { + "epoch": 0.82, + "learning_rate": 3.5378863507350208e-06, + "loss": 0.8866, + "step": 42050 + }, + { + "epoch": 0.82, + "learning_rate": 3.518311899309022e-06, + "loss": 0.8766, + "step": 42100 + }, + { + "epoch": 0.83, + "learning_rate": 3.4987374478830234e-06, + "loss": 0.9094, + "step": 42150 + }, + { + "epoch": 0.83, + "learning_rate": 3.4791629964570244e-06, + "loss": 0.8963, + "step": 42200 + }, + { + "epoch": 0.83, + "learning_rate": 3.4595885450310257e-06, + "loss": 0.7177, + "step": 42250 + }, + { + "epoch": 0.83, + "learning_rate": 3.440014093605027e-06, + "loss": 0.8551, + "step": 42300 + }, + { + "epoch": 0.83, + "learning_rate": 3.420439642179028e-06, + "loss": 0.8674, + "step": 42350 + }, + { + "epoch": 0.83, + "learning_rate": 3.4008651907530293e-06, + "loss": 0.8375, + "step": 42400 + }, + { + "epoch": 0.83, + "learning_rate": 3.3812907393270306e-06, + "loss": 0.8921, + "step": 42450 + }, + { + "epoch": 0.83, + "learning_rate": 3.361716287901032e-06, + "loss": 0.8845, + "step": 42500 + }, + { + "epoch": 0.83, + "learning_rate": 3.342141836475033e-06, + "loss": 0.8361, + "step": 42550 + }, + { + "epoch": 0.83, + "learning_rate": 3.3225673850490342e-06, + "loss": 0.8161, + "step": 42600 + }, + { + "epoch": 0.83, + "learning_rate": 3.3029929336230356e-06, + "loss": 0.8561, + "step": 42650 + }, + { + "epoch": 0.84, + "learning_rate": 3.2834184821970365e-06, + "loss": 0.8798, + "step": 42700 + }, + { + "epoch": 0.84, + "learning_rate": 3.263844030771038e-06, + "loss": 0.8713, + "step": 42750 + }, + { + "epoch": 0.84, + "learning_rate": 3.244269579345039e-06, + "loss": 0.8094, + "step": 42800 + }, + { + "epoch": 0.84, + "learning_rate": 3.22469512791904e-06, + "loss": 0.8569, + "step": 42850 + }, + { + "epoch": 0.84, + "learning_rate": 3.2051206764930414e-06, + "loss": 0.7974, + "step": 42900 + }, + { + "epoch": 0.84, + "learning_rate": 3.1855462250670428e-06, + "loss": 0.7707, + "step": 42950 + }, + { + "epoch": 0.84, + "learning_rate": 3.165971773641044e-06, + "loss": 0.8129, + "step": 43000 + }, + { + "epoch": 0.84, + "learning_rate": 3.146397322215045e-06, + "loss": 0.8381, + "step": 43050 + }, + { + "epoch": 0.84, + "learning_rate": 3.1268228707890464e-06, + "loss": 0.9326, + "step": 43100 + }, + { + "epoch": 0.84, + "learning_rate": 3.1072484193630477e-06, + "loss": 0.9263, + "step": 43150 + }, + { + "epoch": 0.85, + "learning_rate": 3.0876739679370486e-06, + "loss": 0.8431, + "step": 43200 + }, + { + "epoch": 0.85, + "learning_rate": 3.06809951651105e-06, + "loss": 0.7909, + "step": 43250 + }, + { + "epoch": 0.85, + "learning_rate": 3.0485250650850513e-06, + "loss": 0.832, + "step": 43300 + }, + { + "epoch": 0.85, + "learning_rate": 3.0289506136590522e-06, + "loss": 0.9167, + "step": 43350 + }, + { + "epoch": 0.85, + "learning_rate": 3.0093761622330536e-06, + "loss": 0.9032, + "step": 43400 + }, + { + "epoch": 0.85, + "learning_rate": 2.989801710807055e-06, + "loss": 0.8083, + "step": 43450 + }, + { + "epoch": 0.85, + "learning_rate": 2.9702272593810562e-06, + "loss": 0.8541, + "step": 43500 + }, + { + "epoch": 0.85, + "learning_rate": 2.950652807955057e-06, + "loss": 0.787, + "step": 43550 + }, + { + "epoch": 0.85, + "learning_rate": 2.9310783565290585e-06, + "loss": 0.879, + "step": 43600 + }, + { + "epoch": 0.85, + "learning_rate": 2.91150390510306e-06, + "loss": 0.8119, + "step": 43650 + }, + { + "epoch": 0.86, + "learning_rate": 2.8919294536770608e-06, + "loss": 0.86, + "step": 43700 + }, + { + "epoch": 0.86, + "learning_rate": 2.872355002251062e-06, + "loss": 0.8162, + "step": 43750 + }, + { + "epoch": 0.86, + "learning_rate": 2.8527805508250634e-06, + "loss": 0.8034, + "step": 43800 + }, + { + "epoch": 0.86, + "learning_rate": 2.8332060993990644e-06, + "loss": 0.7883, + "step": 43850 + }, + { + "epoch": 0.86, + "learning_rate": 2.8136316479730657e-06, + "loss": 0.8927, + "step": 43900 + }, + { + "epoch": 0.86, + "learning_rate": 2.794057196547067e-06, + "loss": 0.8585, + "step": 43950 + }, + { + "epoch": 0.86, + "learning_rate": 2.7744827451210684e-06, + "loss": 0.8768, + "step": 44000 + }, + { + "epoch": 0.86, + "learning_rate": 2.7549082936950693e-06, + "loss": 0.826, + "step": 44050 + }, + { + "epoch": 0.86, + "learning_rate": 2.7353338422690706e-06, + "loss": 0.8907, + "step": 44100 + }, + { + "epoch": 0.86, + "learning_rate": 2.715759390843072e-06, + "loss": 0.8869, + "step": 44150 + }, + { + "epoch": 0.87, + "learning_rate": 2.696184939417073e-06, + "loss": 0.8397, + "step": 44200 + }, + { + "epoch": 0.87, + "learning_rate": 2.6766104879910742e-06, + "loss": 0.9316, + "step": 44250 + }, + { + "epoch": 0.87, + "learning_rate": 2.6570360365650756e-06, + "loss": 0.8169, + "step": 44300 + }, + { + "epoch": 0.87, + "learning_rate": 2.6374615851390765e-06, + "loss": 0.841, + "step": 44350 + }, + { + "epoch": 0.87, + "learning_rate": 2.617887133713078e-06, + "loss": 0.8966, + "step": 44400 + }, + { + "epoch": 0.87, + "learning_rate": 2.598312682287079e-06, + "loss": 0.8448, + "step": 44450 + }, + { + "epoch": 0.87, + "learning_rate": 2.5787382308610805e-06, + "loss": 0.7451, + "step": 44500 + }, + { + "epoch": 0.87, + "learning_rate": 2.5591637794350814e-06, + "loss": 0.921, + "step": 44550 + }, + { + "epoch": 0.87, + "learning_rate": 2.5395893280090828e-06, + "loss": 0.8679, + "step": 44600 + }, + { + "epoch": 0.87, + "learning_rate": 2.520014876583084e-06, + "loss": 0.94, + "step": 44650 + }, + { + "epoch": 0.87, + "learning_rate": 2.500440425157085e-06, + "loss": 0.7933, + "step": 44700 + }, + { + "epoch": 0.88, + "learning_rate": 2.4808659737310864e-06, + "loss": 0.8309, + "step": 44750 + }, + { + "epoch": 0.88, + "learning_rate": 2.4612915223050877e-06, + "loss": 0.8401, + "step": 44800 + }, + { + "epoch": 0.88, + "learning_rate": 2.4417170708790886e-06, + "loss": 0.8334, + "step": 44850 + }, + { + "epoch": 0.88, + "learning_rate": 2.42214261945309e-06, + "loss": 0.885, + "step": 44900 + }, + { + "epoch": 0.88, + "learning_rate": 2.4025681680270913e-06, + "loss": 0.8044, + "step": 44950 + }, + { + "epoch": 0.88, + "learning_rate": 2.3829937166010927e-06, + "loss": 0.8739, + "step": 45000 + }, + { + "epoch": 0.88, + "learning_rate": 2.3634192651750936e-06, + "loss": 0.8277, + "step": 45050 + }, + { + "epoch": 0.88, + "learning_rate": 2.343844813749095e-06, + "loss": 0.7745, + "step": 45100 + }, + { + "epoch": 0.88, + "learning_rate": 2.3242703623230963e-06, + "loss": 0.8381, + "step": 45150 + }, + { + "epoch": 0.88, + "learning_rate": 2.304695910897097e-06, + "loss": 0.8805, + "step": 45200 + }, + { + "epoch": 0.89, + "learning_rate": 2.2851214594710985e-06, + "loss": 0.8687, + "step": 45250 + }, + { + "epoch": 0.89, + "learning_rate": 2.2655470080451e-06, + "loss": 0.986, + "step": 45300 + }, + { + "epoch": 0.89, + "learning_rate": 2.2459725566191008e-06, + "loss": 0.8102, + "step": 45350 + }, + { + "epoch": 0.89, + "learning_rate": 2.226398105193102e-06, + "loss": 0.8374, + "step": 45400 + }, + { + "epoch": 0.89, + "learning_rate": 2.2068236537671035e-06, + "loss": 0.8183, + "step": 45450 + }, + { + "epoch": 0.89, + "learning_rate": 2.187249202341105e-06, + "loss": 0.7379, + "step": 45500 + }, + { + "epoch": 0.89, + "learning_rate": 2.1676747509151057e-06, + "loss": 0.8932, + "step": 45550 + }, + { + "epoch": 0.89, + "learning_rate": 2.148100299489107e-06, + "loss": 0.8039, + "step": 45600 + }, + { + "epoch": 0.89, + "learning_rate": 2.1285258480631084e-06, + "loss": 0.8697, + "step": 45650 + }, + { + "epoch": 0.89, + "learning_rate": 2.1089513966371093e-06, + "loss": 0.8854, + "step": 45700 + }, + { + "epoch": 0.9, + "learning_rate": 2.0893769452111106e-06, + "loss": 0.7589, + "step": 45750 + }, + { + "epoch": 0.9, + "learning_rate": 2.069802493785112e-06, + "loss": 0.8185, + "step": 45800 + }, + { + "epoch": 0.9, + "learning_rate": 2.050228042359113e-06, + "loss": 0.8476, + "step": 45850 + }, + { + "epoch": 0.9, + "learning_rate": 2.0306535909331142e-06, + "loss": 0.8286, + "step": 45900 + }, + { + "epoch": 0.9, + "learning_rate": 2.0110791395071156e-06, + "loss": 0.8612, + "step": 45950 + }, + { + "epoch": 0.9, + "learning_rate": 1.991504688081117e-06, + "loss": 0.8429, + "step": 46000 + }, + { + "epoch": 0.9, + "learning_rate": 1.971930236655118e-06, + "loss": 0.7646, + "step": 46050 + }, + { + "epoch": 0.9, + "learning_rate": 1.952355785229119e-06, + "loss": 0.8977, + "step": 46100 + }, + { + "epoch": 0.9, + "learning_rate": 1.9327813338031205e-06, + "loss": 0.8532, + "step": 46150 + }, + { + "epoch": 0.9, + "learning_rate": 1.9132068823771214e-06, + "loss": 0.7703, + "step": 46200 + }, + { + "epoch": 0.91, + "learning_rate": 1.8936324309511228e-06, + "loss": 0.8496, + "step": 46250 + }, + { + "epoch": 0.91, + "learning_rate": 1.8740579795251241e-06, + "loss": 0.7815, + "step": 46300 + }, + { + "epoch": 0.91, + "learning_rate": 1.8544835280991253e-06, + "loss": 0.8324, + "step": 46350 + }, + { + "epoch": 0.91, + "learning_rate": 1.8349090766731264e-06, + "loss": 0.793, + "step": 46400 + }, + { + "epoch": 0.91, + "learning_rate": 1.8153346252471277e-06, + "loss": 0.852, + "step": 46450 + }, + { + "epoch": 0.91, + "learning_rate": 1.7957601738211289e-06, + "loss": 0.8608, + "step": 46500 + }, + { + "epoch": 0.91, + "learning_rate": 1.7761857223951302e-06, + "loss": 0.8912, + "step": 46550 + }, + { + "epoch": 0.91, + "learning_rate": 1.7566112709691313e-06, + "loss": 0.821, + "step": 46600 + }, + { + "epoch": 0.91, + "learning_rate": 1.7370368195431325e-06, + "loss": 1.0388, + "step": 46650 + }, + { + "epoch": 0.91, + "learning_rate": 1.7174623681171338e-06, + "loss": 0.91, + "step": 46700 + }, + { + "epoch": 0.92, + "learning_rate": 1.697887916691135e-06, + "loss": 0.8276, + "step": 46750 + }, + { + "epoch": 0.92, + "learning_rate": 1.6783134652651363e-06, + "loss": 0.8285, + "step": 46800 + }, + { + "epoch": 0.92, + "learning_rate": 1.6587390138391374e-06, + "loss": 0.8882, + "step": 46850 + }, + { + "epoch": 0.92, + "learning_rate": 1.6391645624131385e-06, + "loss": 0.8399, + "step": 46900 + }, + { + "epoch": 0.92, + "learning_rate": 1.6195901109871399e-06, + "loss": 0.8796, + "step": 46950 + }, + { + "epoch": 0.92, + "learning_rate": 1.600015659561141e-06, + "loss": 0.8012, + "step": 47000 + }, + { + "epoch": 0.92, + "learning_rate": 1.5804412081351423e-06, + "loss": 0.8199, + "step": 47050 + }, + { + "epoch": 0.92, + "learning_rate": 1.5608667567091435e-06, + "loss": 0.8194, + "step": 47100 + }, + { + "epoch": 0.92, + "learning_rate": 1.5412923052831446e-06, + "loss": 0.8026, + "step": 47150 + }, + { + "epoch": 0.92, + "learning_rate": 1.521717853857146e-06, + "loss": 0.9046, + "step": 47200 + }, + { + "epoch": 0.92, + "learning_rate": 1.502143402431147e-06, + "loss": 0.8155, + "step": 47250 + }, + { + "epoch": 0.93, + "learning_rate": 1.4825689510051484e-06, + "loss": 0.8619, + "step": 47300 + }, + { + "epoch": 0.93, + "learning_rate": 1.4629944995791495e-06, + "loss": 0.8269, + "step": 47350 + }, + { + "epoch": 0.93, + "learning_rate": 1.4434200481531507e-06, + "loss": 0.8113, + "step": 47400 + }, + { + "epoch": 0.93, + "learning_rate": 1.423845596727152e-06, + "loss": 0.8725, + "step": 47450 + }, + { + "epoch": 0.93, + "learning_rate": 1.4042711453011531e-06, + "loss": 0.8423, + "step": 47500 + }, + { + "epoch": 0.93, + "learning_rate": 1.3846966938751545e-06, + "loss": 0.8607, + "step": 47550 + }, + { + "epoch": 0.93, + "learning_rate": 1.3651222424491556e-06, + "loss": 0.7522, + "step": 47600 + }, + { + "epoch": 0.93, + "learning_rate": 1.3455477910231567e-06, + "loss": 0.804, + "step": 47650 + }, + { + "epoch": 0.93, + "learning_rate": 1.325973339597158e-06, + "loss": 0.8359, + "step": 47700 + }, + { + "epoch": 0.93, + "learning_rate": 1.3063988881711592e-06, + "loss": 0.8405, + "step": 47750 + }, + { + "epoch": 0.94, + "learning_rate": 1.2868244367451605e-06, + "loss": 0.8168, + "step": 47800 + }, + { + "epoch": 0.94, + "learning_rate": 1.2672499853191617e-06, + "loss": 0.7954, + "step": 47850 + }, + { + "epoch": 0.94, + "learning_rate": 1.2476755338931628e-06, + "loss": 0.8689, + "step": 47900 + }, + { + "epoch": 0.94, + "learning_rate": 1.228101082467164e-06, + "loss": 0.8532, + "step": 47950 + }, + { + "epoch": 0.94, + "learning_rate": 1.2085266310411653e-06, + "loss": 0.846, + "step": 48000 + }, + { + "epoch": 0.94, + "learning_rate": 1.1889521796151664e-06, + "loss": 0.8195, + "step": 48050 + }, + { + "epoch": 0.94, + "learning_rate": 1.1693777281891675e-06, + "loss": 0.8346, + "step": 48100 + }, + { + "epoch": 0.94, + "learning_rate": 1.1498032767631689e-06, + "loss": 0.8103, + "step": 48150 + }, + { + "epoch": 0.94, + "learning_rate": 1.13022882533717e-06, + "loss": 0.9013, + "step": 48200 + }, + { + "epoch": 0.94, + "learning_rate": 1.1106543739111713e-06, + "loss": 0.8704, + "step": 48250 + }, + { + "epoch": 0.95, + "learning_rate": 1.0910799224851725e-06, + "loss": 0.7644, + "step": 48300 + }, + { + "epoch": 0.95, + "learning_rate": 1.0715054710591736e-06, + "loss": 0.8247, + "step": 48350 + }, + { + "epoch": 0.95, + "learning_rate": 1.051931019633175e-06, + "loss": 0.7922, + "step": 48400 + }, + { + "epoch": 0.95, + "learning_rate": 1.032356568207176e-06, + "loss": 0.8569, + "step": 48450 + }, + { + "epoch": 0.95, + "learning_rate": 1.0127821167811774e-06, + "loss": 0.8725, + "step": 48500 + }, + { + "epoch": 0.95, + "learning_rate": 9.932076653551785e-07, + "loss": 0.8302, + "step": 48550 + }, + { + "epoch": 0.95, + "learning_rate": 9.736332139291797e-07, + "loss": 0.8937, + "step": 48600 + }, + { + "epoch": 0.95, + "learning_rate": 9.54058762503181e-07, + "loss": 0.8193, + "step": 48650 + }, + { + "epoch": 0.95, + "learning_rate": 9.344843110771821e-07, + "loss": 0.8036, + "step": 48700 + }, + { + "epoch": 0.95, + "learning_rate": 9.149098596511834e-07, + "loss": 0.7537, + "step": 48750 + }, + { + "epoch": 0.96, + "learning_rate": 8.953354082251846e-07, + "loss": 0.7915, + "step": 48800 + }, + { + "epoch": 0.96, + "learning_rate": 8.757609567991858e-07, + "loss": 0.8179, + "step": 48850 + }, + { + "epoch": 0.96, + "learning_rate": 8.56186505373187e-07, + "loss": 0.7896, + "step": 48900 + }, + { + "epoch": 0.96, + "learning_rate": 8.366120539471882e-07, + "loss": 0.8691, + "step": 48950 + }, + { + "epoch": 0.96, + "learning_rate": 8.170376025211894e-07, + "loss": 0.8881, + "step": 49000 + }, + { + "epoch": 0.96, + "learning_rate": 7.974631510951907e-07, + "loss": 0.8348, + "step": 49050 + }, + { + "epoch": 0.96, + "learning_rate": 7.778886996691919e-07, + "loss": 0.831, + "step": 49100 + }, + { + "epoch": 0.96, + "learning_rate": 7.58314248243193e-07, + "loss": 0.9022, + "step": 49150 + }, + { + "epoch": 0.96, + "learning_rate": 7.387397968171943e-07, + "loss": 0.8517, + "step": 49200 + }, + { + "epoch": 0.96, + "learning_rate": 7.191653453911955e-07, + "loss": 0.8352, + "step": 49250 + }, + { + "epoch": 0.97, + "learning_rate": 6.995908939651967e-07, + "loss": 0.8007, + "step": 49300 + }, + { + "epoch": 0.97, + "learning_rate": 6.80016442539198e-07, + "loss": 0.8099, + "step": 49350 + }, + { + "epoch": 0.97, + "learning_rate": 6.604419911131991e-07, + "loss": 0.8561, + "step": 49400 + }, + { + "epoch": 0.97, + "learning_rate": 6.408675396872003e-07, + "loss": 0.893, + "step": 49450 + }, + { + "epoch": 0.97, + "learning_rate": 6.212930882612015e-07, + "loss": 0.785, + "step": 49500 + }, + { + "epoch": 0.97, + "learning_rate": 6.017186368352027e-07, + "loss": 0.915, + "step": 49550 + }, + { + "epoch": 0.97, + "learning_rate": 5.821441854092039e-07, + "loss": 0.7017, + "step": 49600 + }, + { + "epoch": 0.97, + "learning_rate": 5.625697339832052e-07, + "loss": 0.8437, + "step": 49650 + }, + { + "epoch": 0.97, + "learning_rate": 5.429952825572064e-07, + "loss": 0.8002, + "step": 49700 + }, + { + "epoch": 0.97, + "learning_rate": 5.234208311312075e-07, + "loss": 0.8034, + "step": 49750 + }, + { + "epoch": 0.97, + "learning_rate": 5.038463797052088e-07, + "loss": 0.7989, + "step": 49800 + }, + { + "epoch": 0.98, + "learning_rate": 4.8427192827921e-07, + "loss": 0.8462, + "step": 49850 + }, + { + "epoch": 0.98, + "learning_rate": 4.6469747685321123e-07, + "loss": 0.7838, + "step": 49900 + }, + { + "epoch": 0.98, + "learning_rate": 4.451230254272124e-07, + "loss": 0.826, + "step": 49950 + }, + { + "epoch": 0.98, + "learning_rate": 4.2554857400121365e-07, + "loss": 0.9047, + "step": 50000 + }, + { + "epoch": 0.98, + "learning_rate": 4.059741225752149e-07, + "loss": 0.8526, + "step": 50050 + }, + { + "epoch": 0.98, + "learning_rate": 3.8639967114921607e-07, + "loss": 0.8042, + "step": 50100 + }, + { + "epoch": 0.98, + "learning_rate": 3.668252197232173e-07, + "loss": 0.9519, + "step": 50150 + }, + { + "epoch": 0.98, + "learning_rate": 3.472507682972185e-07, + "loss": 0.8923, + "step": 50200 + }, + { + "epoch": 0.98, + "learning_rate": 3.276763168712197e-07, + "loss": 0.7782, + "step": 50250 + }, + { + "epoch": 0.98, + "learning_rate": 3.0810186544522095e-07, + "loss": 0.8607, + "step": 50300 + }, + { + "epoch": 0.99, + "learning_rate": 2.8852741401922214e-07, + "loss": 0.8376, + "step": 50350 + }, + { + "epoch": 0.99, + "learning_rate": 2.6895296259322337e-07, + "loss": 0.842, + "step": 50400 + }, + { + "epoch": 0.99, + "learning_rate": 2.4937851116722455e-07, + "loss": 0.878, + "step": 50450 + }, + { + "epoch": 0.99, + "learning_rate": 2.298040597412258e-07, + "loss": 0.8581, + "step": 50500 + }, + { + "epoch": 0.99, + "learning_rate": 2.10229608315227e-07, + "loss": 0.8958, + "step": 50550 + }, + { + "epoch": 0.99, + "learning_rate": 1.9065515688922818e-07, + "loss": 0.92, + "step": 50600 + }, + { + "epoch": 0.99, + "learning_rate": 1.7108070546322939e-07, + "loss": 0.9209, + "step": 50650 + }, + { + "epoch": 0.99, + "learning_rate": 1.5150625403723062e-07, + "loss": 0.9095, + "step": 50700 + }, + { + "epoch": 0.99, + "learning_rate": 1.3193180261123183e-07, + "loss": 0.8469, + "step": 50750 + }, + { + "epoch": 0.99, + "learning_rate": 1.1235735118523305e-07, + "loss": 0.8987, + "step": 50800 + }, + { + "epoch": 1.0, + "learning_rate": 9.278289975923425e-08, + "loss": 0.8769, + "step": 50850 + }, + { + "epoch": 1.0, + "learning_rate": 7.320844833323547e-08, + "loss": 0.8072, + "step": 50900 + }, + { + "epoch": 1.0, + "learning_rate": 5.3633996907236675e-08, + "loss": 0.7988, + "step": 50950 + }, + { + "epoch": 1.0, + "learning_rate": 3.405954548123789e-08, + "loss": 0.7943, + "step": 51000 + }, + { + "epoch": 1.0, + "learning_rate": 1.4485094055239104e-08, + "loss": 0.7839, + "step": 51050 + }, + { + "epoch": 1.0, + "eval_Macro F1": 0.45208930263938163, + "eval_Macro Precision": 0.5507686024916876, + "eval_Macro Recall": 0.4294052669214516, + "eval_Micro F1": 0.7266960098561323, + "eval_Micro Precision": 0.7266960098561323, + "eval_Micro Recall": 0.7266960098561323, + "eval_Weighted F1": 0.6482219016896026, + "eval_Weighted Precision": 0.6261988330407515, + "eval_Weighted Recall": 0.7266960098561323, + "eval_accuracy": 0.7266960098561323, + "eval_loss": 0.8367530107498169, + "eval_runtime": 1309.1607, + "eval_samples_per_second": 153.76, + "eval_steps_per_second": 19.22, + "step": 51087 + }, + { + "epoch": 1.0, + "step": 51087, + "total_flos": 1927062258615528.0, + "train_loss": 0.8681267702094373, + "train_runtime": 9176.1966, + "train_samples_per_second": 44.538, + "train_steps_per_second": 5.567 + } + ], + "max_steps": 51087, + "num_train_epochs": 1, + "total_flos": 1927062258615528.0, + "trial_name": null, + "trial_params": null +}