{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 51465, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.990284659477315e-05, "loss": 0.0, "step": 100 }, { "epoch": 0.0, "learning_rate": 4.98056931895463e-05, "loss": 0.0, "step": 200 }, { "epoch": 0.01, "learning_rate": 4.970853978431944e-05, "loss": 0.0, "step": 300 }, { "epoch": 0.01, "learning_rate": 4.961138637909259e-05, "loss": 0.0, "step": 400 }, { "epoch": 0.01, "learning_rate": 4.951423297386574e-05, "loss": 0.0, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.941707956863889e-05, "loss": 0.0, "step": 600 }, { "epoch": 0.01, "learning_rate": 4.931992616341203e-05, "loss": 0.0, "step": 700 }, { "epoch": 0.02, "learning_rate": 4.922277275818518e-05, "loss": 0.0, "step": 800 }, { "epoch": 0.02, "learning_rate": 4.9125619352958326e-05, "loss": 0.0, "step": 900 }, { "epoch": 0.02, "learning_rate": 4.9028465947731474e-05, "loss": 0.0, "step": 1000 }, { "epoch": 0.02, "learning_rate": 4.8931312542504616e-05, "loss": 0.0, "step": 1100 }, { "epoch": 0.02, "learning_rate": 4.8834159137277764e-05, "loss": 0.0, "step": 1200 }, { "epoch": 0.03, "learning_rate": 4.873700573205091e-05, "loss": 0.0, "step": 1300 }, { "epoch": 0.03, "learning_rate": 4.863985232682406e-05, "loss": 0.0, "step": 1400 }, { "epoch": 0.03, "learning_rate": 4.85426989215972e-05, "loss": 0.0, "step": 1500 }, { "epoch": 0.03, "learning_rate": 4.844554551637035e-05, "loss": 0.0, "step": 1600 }, { "epoch": 0.03, "learning_rate": 4.83483921111435e-05, "loss": 0.0, "step": 1700 }, { "epoch": 0.03, "learning_rate": 4.825123870591665e-05, "loss": 0.0, "step": 1800 }, { "epoch": 0.04, "learning_rate": 4.81540853006898e-05, "loss": 0.0, "step": 1900 }, { "epoch": 0.04, "learning_rate": 4.805693189546294e-05, "loss": 0.0, "step": 2000 }, { "epoch": 0.04, "learning_rate": 4.795977849023609e-05, "loss": 0.0, "step": 2100 }, { "epoch": 0.04, "learning_rate": 4.7862625085009236e-05, "loss": 0.0, "step": 2200 }, { "epoch": 0.04, "learning_rate": 4.776547167978238e-05, "loss": 0.0, "step": 2300 }, { "epoch": 0.05, "learning_rate": 4.7668318274555526e-05, "loss": 0.0, "step": 2400 }, { "epoch": 0.05, "learning_rate": 4.7571164869328675e-05, "loss": 0.0, "step": 2500 }, { "epoch": 0.05, "learning_rate": 4.747401146410182e-05, "loss": 0.0, "step": 2600 }, { "epoch": 0.05, "learning_rate": 4.7376858058874965e-05, "loss": 0.0, "step": 2700 }, { "epoch": 0.05, "learning_rate": 4.727970465364811e-05, "loss": 0.0, "step": 2800 }, { "epoch": 0.06, "learning_rate": 4.7182551248421255e-05, "loss": 0.0, "step": 2900 }, { "epoch": 0.06, "learning_rate": 4.7085397843194404e-05, "loss": 0.0, "step": 3000 }, { "epoch": 0.06, "learning_rate": 4.698824443796755e-05, "loss": 0.0, "step": 3100 }, { "epoch": 0.06, "learning_rate": 4.6891091032740694e-05, "loss": 0.0, "step": 3200 }, { "epoch": 0.06, "learning_rate": 4.679393762751384e-05, "loss": 0.0, "step": 3300 }, { "epoch": 0.07, "learning_rate": 4.669678422228699e-05, "loss": 0.0, "step": 3400 }, { "epoch": 0.07, "learning_rate": 4.659963081706014e-05, "loss": 0.0, "step": 3500 }, { "epoch": 0.07, "learning_rate": 4.650247741183329e-05, "loss": 0.0, "step": 3600 }, { "epoch": 0.07, "learning_rate": 4.640532400660643e-05, "loss": 0.0, "step": 3700 }, { "epoch": 0.07, "learning_rate": 4.630817060137958e-05, "loss": 0.0, "step": 3800 }, { "epoch": 0.08, "learning_rate": 4.621101719615273e-05, "loss": 0.0, "step": 3900 }, { "epoch": 0.08, "learning_rate": 4.6113863790925876e-05, "loss": 0.0, "step": 4000 }, { "epoch": 0.08, "learning_rate": 4.601671038569902e-05, "loss": 0.0, "step": 4100 }, { "epoch": 0.08, "learning_rate": 4.5919556980472166e-05, "loss": 0.0, "step": 4200 }, { "epoch": 0.08, "learning_rate": 4.5822403575245314e-05, "loss": 0.0, "step": 4300 }, { "epoch": 0.09, "learning_rate": 4.572525017001846e-05, "loss": 0.0, "step": 4400 }, { "epoch": 0.09, "learning_rate": 4.5628096764791605e-05, "loss": 0.0, "step": 4500 }, { "epoch": 0.09, "learning_rate": 4.553094335956475e-05, "loss": 0.0, "step": 4600 }, { "epoch": 0.09, "learning_rate": 4.54337899543379e-05, "loss": 0.0, "step": 4700 }, { "epoch": 0.09, "learning_rate": 4.533663654911105e-05, "loss": 0.0, "step": 4800 }, { "epoch": 0.1, "learning_rate": 4.523948314388419e-05, "loss": 0.0, "step": 4900 }, { "epoch": 0.1, "learning_rate": 4.514232973865734e-05, "loss": 0.0, "step": 5000 }, { "epoch": 0.1, "learning_rate": 4.504517633343049e-05, "loss": 0.0, "step": 5100 }, { "epoch": 0.1, "learning_rate": 4.494802292820364e-05, "loss": 0.0, "step": 5200 }, { "epoch": 0.1, "learning_rate": 4.4850869522976786e-05, "loss": 0.0, "step": 5300 }, { "epoch": 0.1, "learning_rate": 4.475371611774993e-05, "loss": 0.0, "step": 5400 }, { "epoch": 0.11, "learning_rate": 4.4656562712523076e-05, "loss": 0.0, "step": 5500 }, { "epoch": 0.11, "learning_rate": 4.4559409307296225e-05, "loss": 0.0, "step": 5600 }, { "epoch": 0.11, "learning_rate": 4.4462255902069374e-05, "loss": 0.0, "step": 5700 }, { "epoch": 0.11, "learning_rate": 4.4365102496842515e-05, "loss": 0.0, "step": 5800 }, { "epoch": 0.11, "learning_rate": 4.4267949091615664e-05, "loss": 0.0, "step": 5900 }, { "epoch": 0.12, "learning_rate": 4.417079568638881e-05, "loss": 0.0, "step": 6000 }, { "epoch": 0.12, "learning_rate": 4.407364228116196e-05, "loss": 0.0, "step": 6100 }, { "epoch": 0.12, "learning_rate": 4.39764888759351e-05, "loss": 0.0, "step": 6200 }, { "epoch": 0.12, "learning_rate": 4.387933547070825e-05, "loss": 0.0, "step": 6300 }, { "epoch": 0.12, "learning_rate": 4.37821820654814e-05, "loss": 0.0, "step": 6400 }, { "epoch": 0.13, "learning_rate": 4.368502866025455e-05, "loss": 0.0, "step": 6500 }, { "epoch": 0.13, "learning_rate": 4.358787525502769e-05, "loss": 0.0, "step": 6600 }, { "epoch": 0.13, "learning_rate": 4.349072184980084e-05, "loss": 0.0, "step": 6700 }, { "epoch": 0.13, "learning_rate": 4.339356844457399e-05, "loss": 0.0, "step": 6800 }, { "epoch": 0.13, "learning_rate": 4.3296415039347136e-05, "loss": 0.0, "step": 6900 }, { "epoch": 0.14, "learning_rate": 4.319926163412028e-05, "loss": 0.0, "step": 7000 }, { "epoch": 0.14, "learning_rate": 4.3102108228893426e-05, "loss": 0.0, "step": 7100 }, { "epoch": 0.14, "learning_rate": 4.3004954823666574e-05, "loss": 0.0, "step": 7200 }, { "epoch": 0.14, "learning_rate": 4.2907801418439716e-05, "loss": 0.0, "step": 7300 }, { "epoch": 0.14, "learning_rate": 4.2810648013212865e-05, "loss": 0.0, "step": 7400 }, { "epoch": 0.15, "learning_rate": 4.271349460798601e-05, "loss": 0.0, "step": 7500 }, { "epoch": 0.15, "learning_rate": 4.2616341202759155e-05, "loss": 0.0, "step": 7600 }, { "epoch": 0.15, "learning_rate": 4.2519187797532303e-05, "loss": 0.0, "step": 7700 }, { "epoch": 0.15, "learning_rate": 4.242203439230545e-05, "loss": 0.0, "step": 7800 }, { "epoch": 0.15, "learning_rate": 4.2324880987078594e-05, "loss": 0.0, "step": 7900 }, { "epoch": 0.16, "learning_rate": 4.222772758185174e-05, "loss": 0.0, "step": 8000 }, { "epoch": 0.16, "learning_rate": 4.213057417662489e-05, "loss": 0.0, "step": 8100 }, { "epoch": 0.16, "learning_rate": 4.203342077139804e-05, "loss": 0.0, "step": 8200 }, { "epoch": 0.16, "learning_rate": 4.193626736617118e-05, "loss": 0.0, "step": 8300 }, { "epoch": 0.16, "learning_rate": 4.183911396094433e-05, "loss": 0.0, "step": 8400 }, { "epoch": 0.17, "learning_rate": 4.174196055571748e-05, "loss": 0.0, "step": 8500 }, { "epoch": 0.17, "learning_rate": 4.1644807150490627e-05, "loss": 0.0, "step": 8600 }, { "epoch": 0.17, "learning_rate": 4.1547653745263775e-05, "loss": 0.0, "step": 8700 }, { "epoch": 0.17, "learning_rate": 4.145050034003692e-05, "loss": 0.0, "step": 8800 }, { "epoch": 0.17, "learning_rate": 4.1353346934810065e-05, "loss": 0.0, "step": 8900 }, { "epoch": 0.17, "learning_rate": 4.1256193529583214e-05, "loss": 0.0, "step": 9000 }, { "epoch": 0.18, "learning_rate": 4.115904012435636e-05, "loss": 0.0, "step": 9100 }, { "epoch": 0.18, "learning_rate": 4.1061886719129504e-05, "loss": 0.0, "step": 9200 }, { "epoch": 0.18, "learning_rate": 4.096473331390265e-05, "loss": 0.0, "step": 9300 }, { "epoch": 0.18, "learning_rate": 4.08675799086758e-05, "loss": 0.0, "step": 9400 }, { "epoch": 0.18, "learning_rate": 4.077042650344895e-05, "loss": 0.0, "step": 9500 }, { "epoch": 0.19, "learning_rate": 4.067327309822209e-05, "loss": 0.0, "step": 9600 }, { "epoch": 0.19, "learning_rate": 4.057611969299524e-05, "loss": 0.0, "step": 9700 }, { "epoch": 0.19, "learning_rate": 4.047896628776839e-05, "loss": 0.0, "step": 9800 }, { "epoch": 0.19, "learning_rate": 4.038181288254154e-05, "loss": 0.0, "step": 9900 }, { "epoch": 0.19, "learning_rate": 4.028465947731468e-05, "loss": 0.0, "step": 10000 }, { "epoch": 0.2, "learning_rate": 4.018750607208783e-05, "loss": 0.0, "step": 10100 }, { "epoch": 0.2, "learning_rate": 4.0090352666860976e-05, "loss": 0.0, "step": 10200 }, { "epoch": 0.2, "learning_rate": 3.9993199261634124e-05, "loss": 0.0, "step": 10300 }, { "epoch": 0.2, "learning_rate": 3.989604585640727e-05, "loss": 0.0, "step": 10400 }, { "epoch": 0.2, "learning_rate": 3.9798892451180415e-05, "loss": 0.0, "step": 10500 }, { "epoch": 0.21, "learning_rate": 3.970173904595356e-05, "loss": 0.0, "step": 10600 }, { "epoch": 0.21, "learning_rate": 3.960458564072671e-05, "loss": 0.0, "step": 10700 }, { "epoch": 0.21, "learning_rate": 3.950743223549986e-05, "loss": 0.0, "step": 10800 }, { "epoch": 0.21, "learning_rate": 3.9410278830273e-05, "loss": 0.0, "step": 10900 }, { "epoch": 0.21, "learning_rate": 3.931312542504615e-05, "loss": 0.0, "step": 11000 }, { "epoch": 0.22, "learning_rate": 3.92159720198193e-05, "loss": 0.0, "step": 11100 }, { "epoch": 0.22, "learning_rate": 3.911881861459245e-05, "loss": 0.0, "step": 11200 }, { "epoch": 0.22, "learning_rate": 3.902166520936559e-05, "loss": 0.0, "step": 11300 }, { "epoch": 0.22, "learning_rate": 3.892451180413874e-05, "loss": 0.0, "step": 11400 }, { "epoch": 0.22, "learning_rate": 3.8827358398911886e-05, "loss": 0.0, "step": 11500 }, { "epoch": 0.23, "learning_rate": 3.8730204993685035e-05, "loss": 0.0, "step": 11600 }, { "epoch": 0.23, "learning_rate": 3.8633051588458183e-05, "loss": 0.0, "step": 11700 }, { "epoch": 0.23, "learning_rate": 3.8535898183231325e-05, "loss": 0.0, "step": 11800 }, { "epoch": 0.23, "learning_rate": 3.8438744778004474e-05, "loss": 0.0, "step": 11900 }, { "epoch": 0.23, "learning_rate": 3.8341591372777615e-05, "loss": 0.0, "step": 12000 }, { "epoch": 0.24, "learning_rate": 3.8244437967550764e-05, "loss": 0.0, "step": 12100 }, { "epoch": 0.24, "learning_rate": 3.814728456232391e-05, "loss": 0.0, "step": 12200 }, { "epoch": 0.24, "learning_rate": 3.8050131157097054e-05, "loss": 0.0, "step": 12300 }, { "epoch": 0.24, "learning_rate": 3.79529777518702e-05, "loss": 0.0, "step": 12400 }, { "epoch": 0.24, "learning_rate": 3.785582434664335e-05, "loss": 0.0, "step": 12500 }, { "epoch": 0.24, "learning_rate": 3.775867094141649e-05, "loss": 0.0, "step": 12600 }, { "epoch": 0.25, "learning_rate": 3.766151753618964e-05, "loss": 0.0, "step": 12700 }, { "epoch": 0.25, "learning_rate": 3.756436413096279e-05, "loss": 0.0, "step": 12800 }, { "epoch": 0.25, "learning_rate": 3.746721072573594e-05, "loss": 0.0, "step": 12900 }, { "epoch": 0.25, "learning_rate": 3.737005732050908e-05, "loss": 0.0, "step": 13000 }, { "epoch": 0.25, "learning_rate": 3.727290391528223e-05, "loss": 0.0, "step": 13100 }, { "epoch": 0.26, "learning_rate": 3.717575051005538e-05, "loss": 0.0, "step": 13200 }, { "epoch": 0.26, "learning_rate": 3.7078597104828526e-05, "loss": 0.0, "step": 13300 }, { "epoch": 0.26, "learning_rate": 3.6981443699601675e-05, "loss": 0.0, "step": 13400 }, { "epoch": 0.26, "learning_rate": 3.6884290294374816e-05, "loss": 0.0, "step": 13500 }, { "epoch": 0.26, "learning_rate": 3.6787136889147965e-05, "loss": 0.0, "step": 13600 }, { "epoch": 0.27, "learning_rate": 3.668998348392111e-05, "loss": 0.0, "step": 13700 }, { "epoch": 0.27, "learning_rate": 3.659283007869426e-05, "loss": 0.0, "step": 13800 }, { "epoch": 0.27, "learning_rate": 3.6495676673467404e-05, "loss": 0.0, "step": 13900 }, { "epoch": 0.27, "learning_rate": 3.639852326824055e-05, "loss": 0.0, "step": 14000 }, { "epoch": 0.27, "learning_rate": 3.63013698630137e-05, "loss": 0.0, "step": 14100 }, { "epoch": 0.28, "learning_rate": 3.620421645778685e-05, "loss": 0.0, "step": 14200 }, { "epoch": 0.28, "learning_rate": 3.610706305255999e-05, "loss": 0.0, "step": 14300 }, { "epoch": 0.28, "learning_rate": 3.600990964733314e-05, "loss": 0.0, "step": 14400 }, { "epoch": 0.28, "learning_rate": 3.591275624210629e-05, "loss": 0.0, "step": 14500 }, { "epoch": 0.28, "learning_rate": 3.5815602836879437e-05, "loss": 0.0, "step": 14600 }, { "epoch": 0.29, "learning_rate": 3.571844943165258e-05, "loss": 0.0, "step": 14700 }, { "epoch": 0.29, "learning_rate": 3.562129602642573e-05, "loss": 0.0, "step": 14800 }, { "epoch": 0.29, "learning_rate": 3.5524142621198875e-05, "loss": 0.0, "step": 14900 }, { "epoch": 0.29, "learning_rate": 3.5426989215972024e-05, "loss": 0.0, "step": 15000 }, { "epoch": 0.29, "learning_rate": 3.532983581074517e-05, "loss": 0.0, "step": 15100 }, { "epoch": 0.3, "learning_rate": 3.5232682405518314e-05, "loss": 0.0, "step": 15200 }, { "epoch": 0.3, "learning_rate": 3.513552900029146e-05, "loss": 0.0, "step": 15300 }, { "epoch": 0.3, "learning_rate": 3.503837559506461e-05, "loss": 0.0, "step": 15400 }, { "epoch": 0.3, "learning_rate": 3.494122218983776e-05, "loss": 0.0, "step": 15500 }, { "epoch": 0.3, "learning_rate": 3.48440687846109e-05, "loss": 0.0, "step": 15600 }, { "epoch": 0.31, "learning_rate": 3.474691537938405e-05, "loss": 0.0, "step": 15700 }, { "epoch": 0.31, "learning_rate": 3.46497619741572e-05, "loss": 0.0, "step": 15800 }, { "epoch": 0.31, "learning_rate": 3.455260856893035e-05, "loss": 0.0, "step": 15900 }, { "epoch": 0.31, "learning_rate": 3.445545516370349e-05, "loss": 0.0, "step": 16000 }, { "epoch": 0.31, "learning_rate": 3.435830175847664e-05, "loss": 0.0, "step": 16100 }, { "epoch": 0.31, "learning_rate": 3.4261148353249786e-05, "loss": 0.0, "step": 16200 }, { "epoch": 0.32, "learning_rate": 3.4163994948022934e-05, "loss": 0.0, "step": 16300 }, { "epoch": 0.32, "learning_rate": 3.4066841542796076e-05, "loss": 0.0, "step": 16400 }, { "epoch": 0.32, "learning_rate": 3.3969688137569225e-05, "loss": 0.0, "step": 16500 }, { "epoch": 0.32, "learning_rate": 3.387253473234237e-05, "loss": 0.0, "step": 16600 }, { "epoch": 0.32, "learning_rate": 3.377538132711552e-05, "loss": 0.0, "step": 16700 }, { "epoch": 0.33, "learning_rate": 3.3678227921888663e-05, "loss": 0.0, "step": 16800 }, { "epoch": 0.33, "learning_rate": 3.358107451666181e-05, "loss": 0.0, "step": 16900 }, { "epoch": 0.33, "learning_rate": 3.348392111143496e-05, "loss": 0.0, "step": 17000 }, { "epoch": 0.33, "learning_rate": 3.33867677062081e-05, "loss": 0.0, "step": 17100 }, { "epoch": 0.33, "learning_rate": 3.328961430098125e-05, "loss": 0.0, "step": 17200 }, { "epoch": 0.34, "learning_rate": 3.319246089575439e-05, "loss": 0.0, "step": 17300 }, { "epoch": 0.34, "learning_rate": 3.309530749052754e-05, "loss": 0.0, "step": 17400 }, { "epoch": 0.34, "learning_rate": 3.299815408530069e-05, "loss": 0.0, "step": 17500 }, { "epoch": 0.34, "learning_rate": 3.290100068007384e-05, "loss": 0.0, "step": 17600 }, { "epoch": 0.34, "learning_rate": 3.280384727484698e-05, "loss": 0.0, "step": 17700 }, { "epoch": 0.35, "learning_rate": 3.270669386962013e-05, "loss": 0.0, "step": 17800 }, { "epoch": 0.35, "learning_rate": 3.260954046439328e-05, "loss": 0.0, "step": 17900 }, { "epoch": 0.35, "learning_rate": 3.2512387059166425e-05, "loss": 0.0, "step": 18000 }, { "epoch": 0.35, "learning_rate": 3.241523365393957e-05, "loss": 0.0, "step": 18100 }, { "epoch": 0.35, "learning_rate": 3.2318080248712716e-05, "loss": 0.0, "step": 18200 }, { "epoch": 0.36, "learning_rate": 3.2220926843485864e-05, "loss": 0.0, "step": 18300 }, { "epoch": 0.36, "learning_rate": 3.212377343825901e-05, "loss": 0.0, "step": 18400 }, { "epoch": 0.36, "learning_rate": 3.202662003303216e-05, "loss": 0.0, "step": 18500 }, { "epoch": 0.36, "learning_rate": 3.19294666278053e-05, "loss": 0.0, "step": 18600 }, { "epoch": 0.36, "learning_rate": 3.183231322257845e-05, "loss": 0.0, "step": 18700 }, { "epoch": 0.37, "learning_rate": 3.17351598173516e-05, "loss": 0.0, "step": 18800 }, { "epoch": 0.37, "learning_rate": 3.163800641212475e-05, "loss": 0.0, "step": 18900 }, { "epoch": 0.37, "learning_rate": 3.154085300689789e-05, "loss": 0.0, "step": 19000 }, { "epoch": 0.37, "learning_rate": 3.144369960167104e-05, "loss": 0.0, "step": 19100 }, { "epoch": 0.37, "learning_rate": 3.134654619644419e-05, "loss": 0.0, "step": 19200 }, { "epoch": 0.38, "learning_rate": 3.1249392791217336e-05, "loss": 0.0, "step": 19300 }, { "epoch": 0.38, "learning_rate": 3.115223938599048e-05, "loss": 0.0, "step": 19400 }, { "epoch": 0.38, "learning_rate": 3.1055085980763626e-05, "loss": 0.0, "step": 19500 }, { "epoch": 0.38, "learning_rate": 3.0957932575536775e-05, "loss": 0.0, "step": 19600 }, { "epoch": 0.38, "learning_rate": 3.086077917030992e-05, "loss": 0.0, "step": 19700 }, { "epoch": 0.38, "learning_rate": 3.0763625765083065e-05, "loss": 0.0, "step": 19800 }, { "epoch": 0.39, "learning_rate": 3.0666472359856214e-05, "loss": 0.0, "step": 19900 }, { "epoch": 0.39, "learning_rate": 3.056931895462936e-05, "loss": 0.0, "step": 20000 }, { "epoch": 0.39, "learning_rate": 3.047216554940251e-05, "loss": 0.0, "step": 20100 }, { "epoch": 0.39, "learning_rate": 3.037501214417566e-05, "loss": 0.0, "step": 20200 }, { "epoch": 0.39, "learning_rate": 3.02778587389488e-05, "loss": 0.0, "step": 20300 }, { "epoch": 0.4, "learning_rate": 3.018070533372195e-05, "loss": 0.0, "step": 20400 }, { "epoch": 0.4, "learning_rate": 3.0083551928495095e-05, "loss": 0.0, "step": 20500 }, { "epoch": 0.4, "learning_rate": 2.9986398523268243e-05, "loss": 0.0, "step": 20600 }, { "epoch": 0.4, "learning_rate": 2.9889245118041388e-05, "loss": 0.0, "step": 20700 }, { "epoch": 0.4, "learning_rate": 2.9792091712814533e-05, "loss": 0.0, "step": 20800 }, { "epoch": 0.41, "learning_rate": 2.9694938307587682e-05, "loss": 0.0, "step": 20900 }, { "epoch": 0.41, "learning_rate": 2.959778490236083e-05, "loss": 0.0, "step": 21000 }, { "epoch": 0.41, "learning_rate": 2.9500631497133972e-05, "loss": 0.0, "step": 21100 }, { "epoch": 0.41, "learning_rate": 2.940347809190712e-05, "loss": 0.0, "step": 21200 }, { "epoch": 0.41, "learning_rate": 2.930632468668027e-05, "loss": 0.0, "step": 21300 }, { "epoch": 0.42, "learning_rate": 2.9209171281453418e-05, "loss": 0.0, "step": 21400 }, { "epoch": 0.42, "learning_rate": 2.911201787622656e-05, "loss": 0.0, "step": 21500 }, { "epoch": 0.42, "learning_rate": 2.9014864470999708e-05, "loss": 0.0, "step": 21600 }, { "epoch": 0.42, "learning_rate": 2.8917711065772857e-05, "loss": 0.0, "step": 21700 }, { "epoch": 0.42, "learning_rate": 2.8820557660546005e-05, "loss": 0.0, "step": 21800 }, { "epoch": 0.43, "learning_rate": 2.8723404255319154e-05, "loss": 0.0, "step": 21900 }, { "epoch": 0.43, "learning_rate": 2.8626250850092295e-05, "loss": 0.0, "step": 22000 }, { "epoch": 0.43, "learning_rate": 2.8529097444865444e-05, "loss": 0.0, "step": 22100 }, { "epoch": 0.43, "learning_rate": 2.8431944039638592e-05, "loss": 0.0, "step": 22200 }, { "epoch": 0.43, "learning_rate": 2.833479063441174e-05, "loss": 0.0, "step": 22300 }, { "epoch": 0.44, "learning_rate": 2.8237637229184883e-05, "loss": 0.0, "step": 22400 }, { "epoch": 0.44, "learning_rate": 2.814048382395803e-05, "loss": 0.0, "step": 22500 }, { "epoch": 0.44, "learning_rate": 2.804333041873118e-05, "loss": 0.0, "step": 22600 }, { "epoch": 0.44, "learning_rate": 2.7946177013504328e-05, "loss": 0.0, "step": 22700 }, { "epoch": 0.44, "learning_rate": 2.784902360827747e-05, "loss": 0.0, "step": 22800 }, { "epoch": 0.44, "learning_rate": 2.775187020305062e-05, "loss": 0.0, "step": 22900 }, { "epoch": 0.45, "learning_rate": 2.7654716797823767e-05, "loss": 0.0, "step": 23000 }, { "epoch": 0.45, "learning_rate": 2.7557563392596912e-05, "loss": 0.0, "step": 23100 }, { "epoch": 0.45, "learning_rate": 2.7460409987370057e-05, "loss": 0.0, "step": 23200 }, { "epoch": 0.45, "learning_rate": 2.7363256582143202e-05, "loss": 0.0, "step": 23300 }, { "epoch": 0.45, "learning_rate": 2.726610317691635e-05, "loss": 0.0, "step": 23400 }, { "epoch": 0.46, "learning_rate": 2.71689497716895e-05, "loss": 0.0, "step": 23500 }, { "epoch": 0.46, "learning_rate": 2.7071796366462648e-05, "loss": 0.0, "step": 23600 }, { "epoch": 0.46, "learning_rate": 2.697464296123579e-05, "loss": 0.0, "step": 23700 }, { "epoch": 0.46, "learning_rate": 2.6877489556008938e-05, "loss": 0.0, "step": 23800 }, { "epoch": 0.46, "learning_rate": 2.6780336150782087e-05, "loss": 0.0, "step": 23900 }, { "epoch": 0.47, "learning_rate": 2.6683182745555235e-05, "loss": 0.0, "step": 24000 }, { "epoch": 0.47, "learning_rate": 2.6586029340328377e-05, "loss": 0.0, "step": 24100 }, { "epoch": 0.47, "learning_rate": 2.6488875935101526e-05, "loss": 0.0, "step": 24200 }, { "epoch": 0.47, "learning_rate": 2.6391722529874674e-05, "loss": 0.0, "step": 24300 }, { "epoch": 0.47, "learning_rate": 2.6294569124647823e-05, "loss": 0.0, "step": 24400 }, { "epoch": 0.48, "learning_rate": 2.6197415719420964e-05, "loss": 0.0, "step": 24500 }, { "epoch": 0.48, "learning_rate": 2.6100262314194113e-05, "loss": 0.0, "step": 24600 }, { "epoch": 0.48, "learning_rate": 2.600310890896726e-05, "loss": 0.0, "step": 24700 }, { "epoch": 0.48, "learning_rate": 2.590595550374041e-05, "loss": 0.0, "step": 24800 }, { "epoch": 0.48, "learning_rate": 2.5808802098513552e-05, "loss": 0.0, "step": 24900 }, { "epoch": 0.49, "learning_rate": 2.57116486932867e-05, "loss": 0.0, "step": 25000 }, { "epoch": 0.49, "learning_rate": 2.561449528805985e-05, "loss": 0.0, "step": 25100 }, { "epoch": 0.49, "learning_rate": 2.5517341882832997e-05, "loss": 0.0, "step": 25200 }, { "epoch": 0.49, "learning_rate": 2.5420188477606142e-05, "loss": 0.0, "step": 25300 }, { "epoch": 0.49, "learning_rate": 2.5323035072379288e-05, "loss": 0.0, "step": 25400 }, { "epoch": 0.5, "learning_rate": 2.5225881667152436e-05, "loss": 0.0, "step": 25500 }, { "epoch": 0.5, "learning_rate": 2.512872826192558e-05, "loss": 0.0, "step": 25600 }, { "epoch": 0.5, "learning_rate": 2.503157485669873e-05, "loss": 0.0, "step": 25700 }, { "epoch": 0.5, "learning_rate": 2.4934421451471875e-05, "loss": 0.0, "step": 25800 }, { "epoch": 0.5, "learning_rate": 2.483726804624502e-05, "loss": 0.0, "step": 25900 }, { "epoch": 0.51, "learning_rate": 2.474011464101817e-05, "loss": 0.0, "step": 26000 }, { "epoch": 0.51, "learning_rate": 2.4642961235791314e-05, "loss": 0.0, "step": 26100 }, { "epoch": 0.51, "learning_rate": 2.4545807830564462e-05, "loss": 0.0, "step": 26200 }, { "epoch": 0.51, "learning_rate": 2.4448654425337607e-05, "loss": 0.0, "step": 26300 }, { "epoch": 0.51, "learning_rate": 2.4351501020110756e-05, "loss": 0.0, "step": 26400 }, { "epoch": 0.51, "learning_rate": 2.42543476148839e-05, "loss": 0.0, "step": 26500 }, { "epoch": 0.52, "learning_rate": 2.415719420965705e-05, "loss": 0.0, "step": 26600 }, { "epoch": 0.52, "learning_rate": 2.4060040804430198e-05, "loss": 0.0, "step": 26700 }, { "epoch": 0.52, "learning_rate": 2.3962887399203343e-05, "loss": 0.0, "step": 26800 }, { "epoch": 0.52, "learning_rate": 2.3865733993976492e-05, "loss": 0.0, "step": 26900 }, { "epoch": 0.52, "learning_rate": 2.3768580588749637e-05, "loss": 0.0, "step": 27000 }, { "epoch": 0.53, "learning_rate": 2.3671427183522785e-05, "loss": 0.0, "step": 27100 }, { "epoch": 0.53, "learning_rate": 2.357427377829593e-05, "loss": 0.0, "step": 27200 }, { "epoch": 0.53, "learning_rate": 2.347712037306908e-05, "loss": 0.0, "step": 27300 }, { "epoch": 0.53, "learning_rate": 2.3379966967842224e-05, "loss": 0.0, "step": 27400 }, { "epoch": 0.53, "learning_rate": 2.3282813562615373e-05, "loss": 0.0, "step": 27500 }, { "epoch": 0.54, "learning_rate": 2.3185660157388518e-05, "loss": 0.0, "step": 27600 }, { "epoch": 0.54, "learning_rate": 2.3088506752161666e-05, "loss": 0.0, "step": 27700 }, { "epoch": 0.54, "learning_rate": 2.299135334693481e-05, "loss": 0.0, "step": 27800 }, { "epoch": 0.54, "learning_rate": 2.2894199941707957e-05, "loss": 0.0, "step": 27900 }, { "epoch": 0.54, "learning_rate": 2.2797046536481105e-05, "loss": 0.0, "step": 28000 }, { "epoch": 0.55, "learning_rate": 2.269989313125425e-05, "loss": 0.0, "step": 28100 }, { "epoch": 0.55, "learning_rate": 2.2602739726027396e-05, "loss": 0.0, "step": 28200 }, { "epoch": 0.55, "learning_rate": 2.2505586320800544e-05, "loss": 0.0, "step": 28300 }, { "epoch": 0.55, "learning_rate": 2.2408432915573693e-05, "loss": 0.0, "step": 28400 }, { "epoch": 0.55, "learning_rate": 2.2311279510346838e-05, "loss": 0.0, "step": 28500 }, { "epoch": 0.56, "learning_rate": 2.2214126105119986e-05, "loss": 0.0, "step": 28600 }, { "epoch": 0.56, "learning_rate": 2.211697269989313e-05, "loss": 0.0, "step": 28700 }, { "epoch": 0.56, "learning_rate": 2.201981929466628e-05, "loss": 0.0, "step": 28800 }, { "epoch": 0.56, "learning_rate": 2.1922665889439425e-05, "loss": 0.0, "step": 28900 }, { "epoch": 0.56, "learning_rate": 2.1825512484212574e-05, "loss": 0.0, "step": 29000 }, { "epoch": 0.57, "learning_rate": 2.172835907898572e-05, "loss": 0.0, "step": 29100 }, { "epoch": 0.57, "learning_rate": 2.1631205673758867e-05, "loss": 0.0, "step": 29200 }, { "epoch": 0.57, "learning_rate": 2.1534052268532012e-05, "loss": 0.0, "step": 29300 }, { "epoch": 0.57, "learning_rate": 2.143689886330516e-05, "loss": 0.0, "step": 29400 }, { "epoch": 0.57, "learning_rate": 2.1339745458078306e-05, "loss": 0.0, "step": 29500 }, { "epoch": 0.58, "learning_rate": 2.1242592052851455e-05, "loss": 0.0, "step": 29600 }, { "epoch": 0.58, "learning_rate": 2.11454386476246e-05, "loss": 0.0, "step": 29700 }, { "epoch": 0.58, "learning_rate": 2.1048285242397748e-05, "loss": 0.0, "step": 29800 }, { "epoch": 0.58, "learning_rate": 2.0951131837170893e-05, "loss": 0.0, "step": 29900 }, { "epoch": 0.58, "learning_rate": 2.0853978431944042e-05, "loss": 0.0, "step": 30000 }, { "epoch": 0.58, "learning_rate": 2.075682502671719e-05, "loss": 0.0, "step": 30100 }, { "epoch": 0.59, "learning_rate": 2.0659671621490336e-05, "loss": 0.0, "step": 30200 }, { "epoch": 0.59, "learning_rate": 2.056251821626348e-05, "loss": 0.0, "step": 30300 }, { "epoch": 0.59, "learning_rate": 2.0465364811036626e-05, "loss": 0.0, "step": 30400 }, { "epoch": 0.59, "learning_rate": 2.0368211405809774e-05, "loss": 0.0, "step": 30500 }, { "epoch": 0.59, "learning_rate": 2.027105800058292e-05, "loss": 0.0, "step": 30600 }, { "epoch": 0.6, "learning_rate": 2.0173904595356068e-05, "loss": 0.0, "step": 30700 }, { "epoch": 0.6, "learning_rate": 2.0076751190129213e-05, "loss": 0.0, "step": 30800 }, { "epoch": 0.6, "learning_rate": 1.997959778490236e-05, "loss": 0.0, "step": 30900 }, { "epoch": 0.6, "learning_rate": 1.9882444379675507e-05, "loss": 0.0, "step": 31000 }, { "epoch": 0.6, "learning_rate": 1.9785290974448655e-05, "loss": 0.0, "step": 31100 }, { "epoch": 0.61, "learning_rate": 1.96881375692218e-05, "loss": 0.0, "step": 31200 }, { "epoch": 0.61, "learning_rate": 1.959098416399495e-05, "loss": 0.0, "step": 31300 }, { "epoch": 0.61, "learning_rate": 1.9493830758768094e-05, "loss": 0.0, "step": 31400 }, { "epoch": 0.61, "learning_rate": 1.9396677353541243e-05, "loss": 0.0, "step": 31500 }, { "epoch": 0.61, "learning_rate": 1.9299523948314388e-05, "loss": 0.0, "step": 31600 }, { "epoch": 0.62, "learning_rate": 1.9202370543087536e-05, "loss": 0.0, "step": 31700 }, { "epoch": 0.62, "learning_rate": 1.9105217137860685e-05, "loss": 0.0, "step": 31800 }, { "epoch": 0.62, "learning_rate": 1.900806373263383e-05, "loss": 0.0, "step": 31900 }, { "epoch": 0.62, "learning_rate": 1.891091032740698e-05, "loss": 0.0, "step": 32000 }, { "epoch": 0.62, "learning_rate": 1.8813756922180124e-05, "loss": 0.0, "step": 32100 }, { "epoch": 0.63, "learning_rate": 1.8716603516953272e-05, "loss": 0.0, "step": 32200 }, { "epoch": 0.63, "learning_rate": 1.8619450111726417e-05, "loss": 0.0, "step": 32300 }, { "epoch": 0.63, "learning_rate": 1.8522296706499566e-05, "loss": 0.0, "step": 32400 }, { "epoch": 0.63, "learning_rate": 1.842514330127271e-05, "loss": 0.0, "step": 32500 }, { "epoch": 0.63, "learning_rate": 1.832798989604586e-05, "loss": 0.0, "step": 32600 }, { "epoch": 0.64, "learning_rate": 1.8230836490819005e-05, "loss": 0.0, "step": 32700 }, { "epoch": 0.64, "learning_rate": 1.813368308559215e-05, "loss": 0.0, "step": 32800 }, { "epoch": 0.64, "learning_rate": 1.80365296803653e-05, "loss": 0.0, "step": 32900 }, { "epoch": 0.64, "learning_rate": 1.7939376275138443e-05, "loss": 0.0, "step": 33000 }, { "epoch": 0.64, "learning_rate": 1.784222286991159e-05, "loss": 0.0, "step": 33100 }, { "epoch": 0.65, "learning_rate": 1.7745069464684737e-05, "loss": 0.0, "step": 33200 }, { "epoch": 0.65, "learning_rate": 1.7647916059457882e-05, "loss": 0.0, "step": 33300 }, { "epoch": 0.65, "learning_rate": 1.755076265423103e-05, "loss": 0.0, "step": 33400 }, { "epoch": 0.65, "learning_rate": 1.745360924900418e-05, "loss": 0.0, "step": 33500 }, { "epoch": 0.65, "learning_rate": 1.7356455843777324e-05, "loss": 0.0, "step": 33600 }, { "epoch": 0.65, "learning_rate": 1.7259302438550473e-05, "loss": 0.0, "step": 33700 }, { "epoch": 0.66, "learning_rate": 1.7162149033323618e-05, "loss": 0.0, "step": 33800 }, { "epoch": 0.66, "learning_rate": 1.7064995628096767e-05, "loss": 0.0, "step": 33900 }, { "epoch": 0.66, "learning_rate": 1.6967842222869912e-05, "loss": 0.0, "step": 34000 }, { "epoch": 0.66, "learning_rate": 1.687068881764306e-05, "loss": 0.0, "step": 34100 }, { "epoch": 0.66, "learning_rate": 1.6773535412416205e-05, "loss": 0.0, "step": 34200 }, { "epoch": 0.67, "learning_rate": 1.6676382007189354e-05, "loss": 0.0, "step": 34300 }, { "epoch": 0.67, "learning_rate": 1.65792286019625e-05, "loss": 0.0, "step": 34400 }, { "epoch": 0.67, "learning_rate": 1.6482075196735648e-05, "loss": 0.0, "step": 34500 }, { "epoch": 0.67, "learning_rate": 1.6384921791508793e-05, "loss": 0.0, "step": 34600 }, { "epoch": 0.67, "learning_rate": 1.628776838628194e-05, "loss": 0.0, "step": 34700 }, { "epoch": 0.68, "learning_rate": 1.6190614981055086e-05, "loss": 0.0, "step": 34800 }, { "epoch": 0.68, "learning_rate": 1.6093461575828235e-05, "loss": 0.0, "step": 34900 }, { "epoch": 0.68, "learning_rate": 1.599630817060138e-05, "loss": 0.0, "step": 35000 }, { "epoch": 0.68, "learning_rate": 1.589915476537453e-05, "loss": 0.0, "step": 35100 }, { "epoch": 0.68, "learning_rate": 1.5802001360147674e-05, "loss": 0.0, "step": 35200 }, { "epoch": 0.69, "learning_rate": 1.570484795492082e-05, "loss": 0.0, "step": 35300 }, { "epoch": 0.69, "learning_rate": 1.5607694549693967e-05, "loss": 0.0, "step": 35400 }, { "epoch": 0.69, "learning_rate": 1.5510541144467113e-05, "loss": 0.0, "step": 35500 }, { "epoch": 0.69, "learning_rate": 1.541338773924026e-05, "loss": 0.0, "step": 35600 }, { "epoch": 0.69, "learning_rate": 1.5316234334013406e-05, "loss": 0.0, "step": 35700 }, { "epoch": 0.7, "learning_rate": 1.5219080928786555e-05, "loss": 0.0, "step": 35800 }, { "epoch": 0.7, "learning_rate": 1.5121927523559702e-05, "loss": 0.0, "step": 35900 }, { "epoch": 0.7, "learning_rate": 1.5024774118332848e-05, "loss": 0.0, "step": 36000 }, { "epoch": 0.7, "learning_rate": 1.4927620713105994e-05, "loss": 0.0, "step": 36100 }, { "epoch": 0.7, "learning_rate": 1.4830467307879142e-05, "loss": 0.0, "step": 36200 }, { "epoch": 0.71, "learning_rate": 1.4733313902652287e-05, "loss": 0.0, "step": 36300 }, { "epoch": 0.71, "learning_rate": 1.4636160497425436e-05, "loss": 0.0, "step": 36400 }, { "epoch": 0.71, "learning_rate": 1.4539007092198581e-05, "loss": 0.0, "step": 36500 }, { "epoch": 0.71, "learning_rate": 1.444185368697173e-05, "loss": 0.0, "step": 36600 }, { "epoch": 0.71, "learning_rate": 1.4344700281744875e-05, "loss": 0.0, "step": 36700 }, { "epoch": 0.72, "learning_rate": 1.4247546876518023e-05, "loss": 0.0, "step": 36800 }, { "epoch": 0.72, "learning_rate": 1.415039347129117e-05, "loss": 0.0, "step": 36900 }, { "epoch": 0.72, "learning_rate": 1.4053240066064317e-05, "loss": 0.0, "step": 37000 }, { "epoch": 0.72, "learning_rate": 1.3956086660837464e-05, "loss": 0.0, "step": 37100 }, { "epoch": 0.72, "learning_rate": 1.3858933255610609e-05, "loss": 0.0, "step": 37200 }, { "epoch": 0.72, "learning_rate": 1.3761779850383757e-05, "loss": 0.0, "step": 37300 }, { "epoch": 0.73, "learning_rate": 1.3664626445156902e-05, "loss": 0.0, "step": 37400 }, { "epoch": 0.73, "learning_rate": 1.3567473039930051e-05, "loss": 0.0, "step": 37500 }, { "epoch": 0.73, "learning_rate": 1.3470319634703196e-05, "loss": 0.0, "step": 37600 }, { "epoch": 0.73, "learning_rate": 1.3373166229476345e-05, "loss": 0.0, "step": 37700 }, { "epoch": 0.73, "learning_rate": 1.327601282424949e-05, "loss": 0.0, "step": 37800 }, { "epoch": 0.74, "learning_rate": 1.3178859419022638e-05, "loss": 0.0, "step": 37900 }, { "epoch": 0.74, "learning_rate": 1.3081706013795783e-05, "loss": 0.0, "step": 38000 }, { "epoch": 0.74, "learning_rate": 1.2984552608568932e-05, "loss": 0.0, "step": 38100 }, { "epoch": 0.74, "learning_rate": 1.2887399203342077e-05, "loss": 0.0, "step": 38200 }, { "epoch": 0.74, "learning_rate": 1.2790245798115224e-05, "loss": 0.0, "step": 38300 }, { "epoch": 0.75, "learning_rate": 1.2693092392888372e-05, "loss": 0.0, "step": 38400 }, { "epoch": 0.75, "learning_rate": 1.2595938987661518e-05, "loss": 0.0, "step": 38500 }, { "epoch": 0.75, "learning_rate": 1.2498785582434664e-05, "loss": 0.0, "step": 38600 }, { "epoch": 0.75, "learning_rate": 1.2401632177207811e-05, "loss": 0.0, "step": 38700 }, { "epoch": 0.75, "learning_rate": 1.2304478771980958e-05, "loss": 0.0, "step": 38800 }, { "epoch": 0.76, "learning_rate": 1.2207325366754105e-05, "loss": 0.0, "step": 38900 }, { "epoch": 0.76, "learning_rate": 1.2110171961527252e-05, "loss": 0.0, "step": 39000 }, { "epoch": 0.76, "learning_rate": 1.2013018556300399e-05, "loss": 0.0, "step": 39100 }, { "epoch": 0.76, "learning_rate": 1.1915865151073545e-05, "loss": 0.0, "step": 39200 }, { "epoch": 0.76, "learning_rate": 1.1818711745846694e-05, "loss": 0.0, "step": 39300 }, { "epoch": 0.77, "learning_rate": 1.1721558340619839e-05, "loss": 0.0, "step": 39400 }, { "epoch": 0.77, "learning_rate": 1.1624404935392986e-05, "loss": 0.0, "step": 39500 }, { "epoch": 0.77, "learning_rate": 1.1527251530166133e-05, "loss": 0.0, "step": 39600 }, { "epoch": 0.77, "learning_rate": 1.143009812493928e-05, "loss": 0.0, "step": 39700 }, { "epoch": 0.77, "learning_rate": 1.1332944719712426e-05, "loss": 0.0, "step": 39800 }, { "epoch": 0.78, "learning_rate": 1.1235791314485573e-05, "loss": 0.0, "step": 39900 }, { "epoch": 0.78, "learning_rate": 1.113863790925872e-05, "loss": 0.0, "step": 40000 }, { "epoch": 0.78, "learning_rate": 1.1041484504031867e-05, "loss": 0.0, "step": 40100 }, { "epoch": 0.78, "learning_rate": 1.0944331098805014e-05, "loss": 0.0, "step": 40200 }, { "epoch": 0.78, "learning_rate": 1.084717769357816e-05, "loss": 0.0, "step": 40300 }, { "epoch": 0.78, "learning_rate": 1.0750024288351307e-05, "loss": 0.0, "step": 40400 }, { "epoch": 0.79, "learning_rate": 1.0652870883124454e-05, "loss": 0.0, "step": 40500 }, { "epoch": 0.79, "learning_rate": 1.0555717477897601e-05, "loss": 0.0, "step": 40600 }, { "epoch": 0.79, "learning_rate": 1.0458564072670748e-05, "loss": 0.0, "step": 40700 }, { "epoch": 0.79, "learning_rate": 1.0361410667443893e-05, "loss": 0.0, "step": 40800 }, { "epoch": 0.79, "learning_rate": 1.0264257262217042e-05, "loss": 0.0, "step": 40900 }, { "epoch": 0.8, "learning_rate": 1.0167103856990188e-05, "loss": 0.0, "step": 41000 }, { "epoch": 0.8, "learning_rate": 1.0069950451763335e-05, "loss": 0.0, "step": 41100 }, { "epoch": 0.8, "learning_rate": 9.972797046536482e-06, "loss": 0.0, "step": 41200 }, { "epoch": 0.8, "learning_rate": 9.875643641309629e-06, "loss": 0.0, "step": 41300 }, { "epoch": 0.8, "learning_rate": 9.778490236082776e-06, "loss": 0.0, "step": 41400 }, { "epoch": 0.81, "learning_rate": 9.681336830855922e-06, "loss": 0.0, "step": 41500 }, { "epoch": 0.81, "learning_rate": 9.58418342562907e-06, "loss": 0.0, "step": 41600 }, { "epoch": 0.81, "learning_rate": 9.487030020402216e-06, "loss": 0.0, "step": 41700 }, { "epoch": 0.81, "learning_rate": 9.389876615175363e-06, "loss": 0.0, "step": 41800 }, { "epoch": 0.81, "learning_rate": 9.29272320994851e-06, "loss": 0.0, "step": 41900 }, { "epoch": 0.82, "learning_rate": 9.195569804721655e-06, "loss": 0.0, "step": 42000 }, { "epoch": 0.82, "learning_rate": 9.098416399494802e-06, "loss": 0.0, "step": 42100 }, { "epoch": 0.82, "learning_rate": 9.001262994267949e-06, "loss": 0.0, "step": 42200 }, { "epoch": 0.82, "learning_rate": 8.904109589041095e-06, "loss": 0.0, "step": 42300 }, { "epoch": 0.82, "learning_rate": 8.806956183814242e-06, "loss": 0.0, "step": 42400 }, { "epoch": 0.83, "learning_rate": 8.709802778587389e-06, "loss": 0.0, "step": 42500 }, { "epoch": 0.83, "learning_rate": 8.612649373360538e-06, "loss": 0.0, "step": 42600 }, { "epoch": 0.83, "learning_rate": 8.515495968133684e-06, "loss": 0.0, "step": 42700 }, { "epoch": 0.83, "learning_rate": 8.418342562906831e-06, "loss": 0.0, "step": 42800 }, { "epoch": 0.83, "learning_rate": 8.321189157679978e-06, "loss": 0.0, "step": 42900 }, { "epoch": 0.84, "learning_rate": 8.224035752453125e-06, "loss": 0.0, "step": 43000 }, { "epoch": 0.84, "learning_rate": 8.12688234722627e-06, "loss": 0.0, "step": 43100 }, { "epoch": 0.84, "learning_rate": 8.029728941999417e-06, "loss": 0.0, "step": 43200 }, { "epoch": 0.84, "learning_rate": 7.932575536772564e-06, "loss": 0.0, "step": 43300 }, { "epoch": 0.84, "learning_rate": 7.83542213154571e-06, "loss": 0.0, "step": 43400 }, { "epoch": 0.85, "learning_rate": 7.738268726318857e-06, "loss": 0.0, "step": 43500 }, { "epoch": 0.85, "learning_rate": 7.641115321092004e-06, "loss": 0.0, "step": 43600 }, { "epoch": 0.85, "learning_rate": 7.543961915865151e-06, "loss": 0.0, "step": 43700 }, { "epoch": 0.85, "learning_rate": 7.446808510638298e-06, "loss": 0.0, "step": 43800 }, { "epoch": 0.85, "learning_rate": 7.349655105411445e-06, "loss": 0.0, "step": 43900 }, { "epoch": 0.85, "learning_rate": 7.252501700184592e-06, "loss": 0.0, "step": 44000 }, { "epoch": 0.86, "learning_rate": 7.155348294957738e-06, "loss": 0.0, "step": 44100 }, { "epoch": 0.86, "learning_rate": 7.058194889730884e-06, "loss": 0.0, "step": 44200 }, { "epoch": 0.86, "learning_rate": 6.961041484504033e-06, "loss": 0.0, "step": 44300 }, { "epoch": 0.86, "learning_rate": 6.86388807927718e-06, "loss": 0.0, "step": 44400 }, { "epoch": 0.86, "learning_rate": 6.766734674050327e-06, "loss": 0.0, "step": 44500 }, { "epoch": 0.87, "learning_rate": 6.669581268823473e-06, "loss": 0.0, "step": 44600 }, { "epoch": 0.87, "learning_rate": 6.5724278635966194e-06, "loss": 0.0, "step": 44700 }, { "epoch": 0.87, "learning_rate": 6.475274458369766e-06, "loss": 0.0, "step": 44800 }, { "epoch": 0.87, "learning_rate": 6.378121053142913e-06, "loss": 0.0, "step": 44900 }, { "epoch": 0.87, "learning_rate": 6.28096764791606e-06, "loss": 0.0, "step": 45000 }, { "epoch": 0.88, "learning_rate": 6.183814242689207e-06, "loss": 0.0, "step": 45100 }, { "epoch": 0.88, "learning_rate": 6.086660837462354e-06, "loss": 0.0, "step": 45200 }, { "epoch": 0.88, "learning_rate": 5.9895074322354996e-06, "loss": 0.0, "step": 45300 }, { "epoch": 0.88, "learning_rate": 5.892354027008646e-06, "loss": 0.0, "step": 45400 }, { "epoch": 0.88, "learning_rate": 5.795200621781794e-06, "loss": 0.0, "step": 45500 }, { "epoch": 0.89, "learning_rate": 5.698047216554941e-06, "loss": 0.0, "step": 45600 }, { "epoch": 0.89, "learning_rate": 5.600893811328088e-06, "loss": 0.0, "step": 45700 }, { "epoch": 0.89, "learning_rate": 5.503740406101235e-06, "loss": 0.0, "step": 45800 }, { "epoch": 0.89, "learning_rate": 5.4065870008743806e-06, "loss": 0.0, "step": 45900 }, { "epoch": 0.89, "learning_rate": 5.309433595647527e-06, "loss": 0.0, "step": 46000 }, { "epoch": 0.9, "learning_rate": 5.212280190420674e-06, "loss": 0.0, "step": 46100 }, { "epoch": 0.9, "learning_rate": 5.115126785193821e-06, "loss": 0.0, "step": 46200 }, { "epoch": 0.9, "learning_rate": 5.017973379966969e-06, "loss": 0.0, "step": 46300 }, { "epoch": 0.9, "learning_rate": 4.920819974740115e-06, "loss": 0.0, "step": 46400 }, { "epoch": 0.9, "learning_rate": 4.8236665695132616e-06, "loss": 0.0, "step": 46500 }, { "epoch": 0.91, "learning_rate": 4.726513164286408e-06, "loss": 0.0, "step": 46600 }, { "epoch": 0.91, "learning_rate": 4.629359759059555e-06, "loss": 0.0, "step": 46700 }, { "epoch": 0.91, "learning_rate": 4.532206353832702e-06, "loss": 0.0, "step": 46800 }, { "epoch": 0.91, "learning_rate": 4.435052948605849e-06, "loss": 0.0, "step": 46900 }, { "epoch": 0.91, "learning_rate": 4.337899543378996e-06, "loss": 0.0, "step": 47000 }, { "epoch": 0.92, "learning_rate": 4.240746138152142e-06, "loss": 0.0, "step": 47100 }, { "epoch": 0.92, "learning_rate": 4.143592732925289e-06, "loss": 0.0, "step": 47200 }, { "epoch": 0.92, "learning_rate": 4.046439327698436e-06, "loss": 0.0, "step": 47300 }, { "epoch": 0.92, "learning_rate": 3.949285922471583e-06, "loss": 0.0, "step": 47400 }, { "epoch": 0.92, "learning_rate": 3.85213251724473e-06, "loss": 0.0, "step": 47500 }, { "epoch": 0.92, "learning_rate": 3.7549791120178763e-06, "loss": 0.0, "step": 47600 }, { "epoch": 0.93, "learning_rate": 3.657825706791023e-06, "loss": 0.0, "step": 47700 }, { "epoch": 0.93, "learning_rate": 3.56067230156417e-06, "loss": 0.0, "step": 47800 }, { "epoch": 0.93, "learning_rate": 3.4635188963373164e-06, "loss": 0.0, "step": 47900 }, { "epoch": 0.93, "learning_rate": 3.366365491110464e-06, "loss": 0.0, "step": 48000 }, { "epoch": 0.93, "learning_rate": 3.2692120858836105e-06, "loss": 0.0, "step": 48100 }, { "epoch": 0.94, "learning_rate": 3.1720586806567573e-06, "loss": 0.0, "step": 48200 }, { "epoch": 0.94, "learning_rate": 3.074905275429904e-06, "loss": 0.0, "step": 48300 }, { "epoch": 0.94, "learning_rate": 2.977751870203051e-06, "loss": 0.0, "step": 48400 }, { "epoch": 0.94, "learning_rate": 2.8805984649761974e-06, "loss": 0.0, "step": 48500 }, { "epoch": 0.94, "learning_rate": 2.7834450597493446e-06, "loss": 0.0, "step": 48600 }, { "epoch": 0.95, "learning_rate": 2.686291654522491e-06, "loss": 0.0, "step": 48700 }, { "epoch": 0.95, "learning_rate": 2.589138249295638e-06, "loss": 0.0, "step": 48800 }, { "epoch": 0.95, "learning_rate": 2.4919848440687847e-06, "loss": 0.0, "step": 48900 }, { "epoch": 0.95, "learning_rate": 2.3948314388419315e-06, "loss": 0.0, "step": 49000 }, { "epoch": 0.95, "learning_rate": 2.2976780336150784e-06, "loss": 0.0, "step": 49100 }, { "epoch": 0.96, "learning_rate": 2.200524628388225e-06, "loss": 0.0, "step": 49200 }, { "epoch": 0.96, "learning_rate": 2.103371223161372e-06, "loss": 0.0, "step": 49300 }, { "epoch": 0.96, "learning_rate": 2.0062178179345184e-06, "loss": 0.0, "step": 49400 }, { "epoch": 0.96, "learning_rate": 1.9090644127076657e-06, "loss": 0.0, "step": 49500 }, { "epoch": 0.96, "learning_rate": 1.8119110074808123e-06, "loss": 0.0, "step": 49600 }, { "epoch": 0.97, "learning_rate": 1.714757602253959e-06, "loss": 0.0, "step": 49700 }, { "epoch": 0.97, "learning_rate": 1.6176041970271058e-06, "loss": 0.0, "step": 49800 }, { "epoch": 0.97, "learning_rate": 1.5204507918002526e-06, "loss": 0.0, "step": 49900 }, { "epoch": 0.97, "learning_rate": 1.4232973865733994e-06, "loss": 0.0, "step": 50000 }, { "epoch": 0.97, "learning_rate": 1.3261439813465462e-06, "loss": 0.0, "step": 50100 }, { "epoch": 0.98, "learning_rate": 1.228990576119693e-06, "loss": 0.0, "step": 50200 }, { "epoch": 0.98, "learning_rate": 1.13183717089284e-06, "loss": 0.0, "step": 50300 }, { "epoch": 0.98, "learning_rate": 1.0346837656659867e-06, "loss": 0.0, "step": 50400 }, { "epoch": 0.98, "learning_rate": 9.375303604391334e-07, "loss": 0.0, "step": 50500 }, { "epoch": 0.98, "learning_rate": 8.403769552122803e-07, "loss": 0.0, "step": 50600 }, { "epoch": 0.99, "learning_rate": 7.43223549985427e-07, "loss": 0.0, "step": 50700 }, { "epoch": 0.99, "learning_rate": 6.460701447585738e-07, "loss": 0.0, "step": 50800 }, { "epoch": 0.99, "learning_rate": 5.489167395317206e-07, "loss": 0.0, "step": 50900 }, { "epoch": 0.99, "learning_rate": 4.517633343048674e-07, "loss": 0.0, "step": 51000 }, { "epoch": 0.99, "learning_rate": 3.546099290780142e-07, "loss": 0.0, "step": 51100 }, { "epoch": 0.99, "learning_rate": 2.57456523851161e-07, "loss": 0.0, "step": 51200 }, { "epoch": 1.0, "learning_rate": 1.6030311862430779e-07, "loss": 0.0, "step": 51300 }, { "epoch": 1.0, "learning_rate": 6.314971339745459e-08, "loss": 0.0, "step": 51400 } ], "max_steps": 51465, "num_train_epochs": 1, "total_flos": 8.545904720294707e+16, "trial_name": null, "trial_params": null }