|
{ |
|
"best_metric": 0.587454617023468, |
|
"best_model_checkpoint": "models/mnli_xnli_shuff_all/checkpoint-184017", |
|
"epoch": 1.0, |
|
"global_step": 184017, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9945657194715707e-05, |
|
"loss": 0.796, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9891314389431413e-05, |
|
"loss": 0.7802, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9836971584147118e-05, |
|
"loss": 0.7726, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9782628778862823e-05, |
|
"loss": 0.7543, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.972828597357853e-05, |
|
"loss": 0.7571, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9673943168294238e-05, |
|
"loss": 0.7342, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.961960036300994e-05, |
|
"loss": 0.7401, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.956525755772565e-05, |
|
"loss": 0.7427, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.951091475244135e-05, |
|
"loss": 0.7412, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9456571947157056e-05, |
|
"loss": 0.7342, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9402229141872765e-05, |
|
"loss": 0.7286, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9347886336588467e-05, |
|
"loss": 0.731, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9293543531304176e-05, |
|
"loss": 0.7345, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.923920072601988e-05, |
|
"loss": 0.7233, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9184857920735587e-05, |
|
"loss": 0.7177, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9130515115451292e-05, |
|
"loss": 0.7239, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9076172310166994e-05, |
|
"loss": 0.714, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9021829504882703e-05, |
|
"loss": 0.7209, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.896748669959841e-05, |
|
"loss": 0.7191, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8913143894314114e-05, |
|
"loss": 0.7079, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.885880108902982e-05, |
|
"loss": 0.7131, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8804458283745525e-05, |
|
"loss": 0.7133, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.875011547846123e-05, |
|
"loss": 0.702, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8695772673176936e-05, |
|
"loss": 0.6985, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.864142986789264e-05, |
|
"loss": 0.7004, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8587087062608347e-05, |
|
"loss": 0.7062, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8532744257324052e-05, |
|
"loss": 0.6984, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8478401452039758e-05, |
|
"loss": 0.693, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8424058646755467e-05, |
|
"loss": 0.697, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.836971584147117e-05, |
|
"loss": 0.703, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8315373036186878e-05, |
|
"loss": 0.6923, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.826103023090258e-05, |
|
"loss": 0.6837, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8206687425618285e-05, |
|
"loss": 0.7035, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8152344620333994e-05, |
|
"loss": 0.687, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8098001815049696e-05, |
|
"loss": 0.6872, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8043659009765405e-05, |
|
"loss": 0.6864, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.798931620448111e-05, |
|
"loss": 0.6825, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7934973399196816e-05, |
|
"loss": 0.6864, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.788063059391252e-05, |
|
"loss": 0.6834, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7826287788628227e-05, |
|
"loss": 0.6794, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7771944983343932e-05, |
|
"loss": 0.676, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7717602178059638e-05, |
|
"loss": 0.6906, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7663259372775343e-05, |
|
"loss": 0.6764, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.760891656749105e-05, |
|
"loss": 0.6764, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7554573762206754e-05, |
|
"loss": 0.6833, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.750023095692246e-05, |
|
"loss": 0.6712, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7445888151638165e-05, |
|
"loss": 0.6811, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.739154534635387e-05, |
|
"loss": 0.6707, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7337202541069576e-05, |
|
"loss": 0.6749, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.728285973578528e-05, |
|
"loss": 0.6683, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7228516930500987e-05, |
|
"loss": 0.6689, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7174174125216695e-05, |
|
"loss": 0.6736, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7119831319932398e-05, |
|
"loss": 0.6724, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.7065488514648106e-05, |
|
"loss": 0.6737, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.701114570936381e-05, |
|
"loss": 0.6631, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6956802904079517e-05, |
|
"loss": 0.6861, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6902460098795223e-05, |
|
"loss": 0.6749, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6848117293510925e-05, |
|
"loss": 0.6619, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6793774488226634e-05, |
|
"loss": 0.662, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.673943168294234e-05, |
|
"loss": 0.6522, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6685088877658045e-05, |
|
"loss": 0.671, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.663074607237375e-05, |
|
"loss": 0.6617, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6576403267089455e-05, |
|
"loss": 0.658, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.652206046180516e-05, |
|
"loss": 0.6588, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6467717656520866e-05, |
|
"loss": 0.6615, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6413374851236572e-05, |
|
"loss": 0.6628, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6359032045952277e-05, |
|
"loss": 0.6513, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6304689240667983e-05, |
|
"loss": 0.6586, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6250346435383688e-05, |
|
"loss": 0.6491, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6196003630099394e-05, |
|
"loss": 0.6708, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.61416608248151e-05, |
|
"loss": 0.6565, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.6087318019530808e-05, |
|
"loss": 0.6525, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.603297521424651e-05, |
|
"loss": 0.6503, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.5978632408962215e-05, |
|
"loss": 0.6465, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.5924289603677924e-05, |
|
"loss": 0.6477, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5869946798393626e-05, |
|
"loss": 0.6473, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5815603993109335e-05, |
|
"loss": 0.6494, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5761261187825037e-05, |
|
"loss": 0.6371, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5706918382540746e-05, |
|
"loss": 0.6434, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.565257557725645e-05, |
|
"loss": 0.645, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5598232771972157e-05, |
|
"loss": 0.65, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5543889966687863e-05, |
|
"loss": 0.6432, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5489547161403568e-05, |
|
"loss": 0.6437, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5435204356119273e-05, |
|
"loss": 0.6297, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.538086155083498e-05, |
|
"loss": 0.6376, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5326518745550684e-05, |
|
"loss": 0.6439, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.527217594026639e-05, |
|
"loss": 0.6435, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5217833134982097e-05, |
|
"loss": 0.6474, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.51634903296978e-05, |
|
"loss": 0.6356, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5109147524413506e-05, |
|
"loss": 0.6386, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.5054804719129212e-05, |
|
"loss": 0.6349, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.5000461913844917e-05, |
|
"loss": 0.6333, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.4946119108560624e-05, |
|
"loss": 0.6439, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4891776303276328e-05, |
|
"loss": 0.6334, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4837433497992035e-05, |
|
"loss": 0.6384, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.478309069270774e-05, |
|
"loss": 0.6481, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4728747887423446e-05, |
|
"loss": 0.6308, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4674405082139151e-05, |
|
"loss": 0.6403, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4620062276854857e-05, |
|
"loss": 0.6347, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4565719471570562e-05, |
|
"loss": 0.6394, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4511376666286268e-05, |
|
"loss": 0.6266, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4457033861001975e-05, |
|
"loss": 0.6298, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4402691055717679e-05, |
|
"loss": 0.6187, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4348348250433386e-05, |
|
"loss": 0.6278, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4294005445149091e-05, |
|
"loss": 0.6331, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4239662639864795e-05, |
|
"loss": 0.6359, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4185319834580502e-05, |
|
"loss": 0.6297, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4130977029296206e-05, |
|
"loss": 0.6307, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4076634224011913e-05, |
|
"loss": 0.6297, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4022291418727619e-05, |
|
"loss": 0.6318, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.3967948613443326e-05, |
|
"loss": 0.6346, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.391360580815903e-05, |
|
"loss": 0.6179, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3859263002874737e-05, |
|
"loss": 0.6215, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.380492019759044e-05, |
|
"loss": 0.6269, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3750577392306146e-05, |
|
"loss": 0.6204, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3696234587021853e-05, |
|
"loss": 0.6297, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3641891781737557e-05, |
|
"loss": 0.6206, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3587548976453264e-05, |
|
"loss": 0.6251, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.353320617116897e-05, |
|
"loss": 0.6268, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3478863365884675e-05, |
|
"loss": 0.6182, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.342452056060038e-05, |
|
"loss": 0.6201, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3370177755316086e-05, |
|
"loss": 0.6138, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3315834950031791e-05, |
|
"loss": 0.6241, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3261492144747497e-05, |
|
"loss": 0.6134, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3207149339463204e-05, |
|
"loss": 0.6235, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3152806534178908e-05, |
|
"loss": 0.6065, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.3098463728894615e-05, |
|
"loss": 0.6088, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.304412092361032e-05, |
|
"loss": 0.612, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.2989778118326026e-05, |
|
"loss": 0.6185, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.2935435313041731e-05, |
|
"loss": 0.6032, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2881092507757435e-05, |
|
"loss": 0.6124, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2826749702473142e-05, |
|
"loss": 0.6094, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2772406897188848e-05, |
|
"loss": 0.6005, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2718064091904555e-05, |
|
"loss": 0.6132, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2663721286620258e-05, |
|
"loss": 0.6124, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2609378481335966e-05, |
|
"loss": 0.6142, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.255503567605167e-05, |
|
"loss": 0.6104, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2500692870767375e-05, |
|
"loss": 0.6183, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2446350065483082e-05, |
|
"loss": 0.607, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2392007260198786e-05, |
|
"loss": 0.5969, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2337664454914493e-05, |
|
"loss": 0.6052, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2283321649630198e-05, |
|
"loss": 0.613, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2228978844345904e-05, |
|
"loss": 0.5975, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.217463603906161e-05, |
|
"loss": 0.5998, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2120293233777316e-05, |
|
"loss": 0.5949, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.206595042849302e-05, |
|
"loss": 0.6029, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2011607623208726e-05, |
|
"loss": 0.6074, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.1957264817924433e-05, |
|
"loss": 0.5985, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.1902922012640136e-05, |
|
"loss": 0.6105, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1848579207355844e-05, |
|
"loss": 0.6064, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1794236402071549e-05, |
|
"loss": 0.5912, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1739893596787255e-05, |
|
"loss": 0.6117, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.168555079150296e-05, |
|
"loss": 0.5947, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1631207986218664e-05, |
|
"loss": 0.5985, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1576865180934371e-05, |
|
"loss": 0.6108, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1522522375650076e-05, |
|
"loss": 0.5972, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1468179570365784e-05, |
|
"loss": 0.6002, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1413836765081487e-05, |
|
"loss": 0.5921, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1359493959797194e-05, |
|
"loss": 0.6026, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1305151154512898e-05, |
|
"loss": 0.603, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1250808349228605e-05, |
|
"loss": 0.5963, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.119646554394431e-05, |
|
"loss": 0.5942, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1142122738660015e-05, |
|
"loss": 0.6006, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1087779933375722e-05, |
|
"loss": 0.6026, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1033437128091427e-05, |
|
"loss": 0.5944, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.0979094322807133e-05, |
|
"loss": 0.6031, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.0924751517522838e-05, |
|
"loss": 0.5906, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0870408712238545e-05, |
|
"loss": 0.595, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0816065906954249e-05, |
|
"loss": 0.5921, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0761723101669956e-05, |
|
"loss": 0.5944, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0707380296385662e-05, |
|
"loss": 0.5752, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0653037491101365e-05, |
|
"loss": 0.5942, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0598694685817072e-05, |
|
"loss": 0.5946, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0544351880532778e-05, |
|
"loss": 0.5911, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0490009075248483e-05, |
|
"loss": 0.5982, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0435666269964189e-05, |
|
"loss": 0.5941, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0381323464679896e-05, |
|
"loss": 0.598, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.03269806593956e-05, |
|
"loss": 0.5845, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0272637854111305e-05, |
|
"loss": 0.5958, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0218295048827012e-05, |
|
"loss": 0.584, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0163952243542716e-05, |
|
"loss": 0.5774, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0109609438258423e-05, |
|
"loss": 0.5809, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0055266632974127e-05, |
|
"loss": 0.5882, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0000923827689834e-05, |
|
"loss": 0.5948, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.94658102240554e-06, |
|
"loss": 0.5809, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.892238217121245e-06, |
|
"loss": 0.5816, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.83789541183695e-06, |
|
"loss": 0.592, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.783552606552656e-06, |
|
"loss": 0.5822, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.729209801268361e-06, |
|
"loss": 0.5809, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.674866995984069e-06, |
|
"loss": 0.5736, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.620524190699772e-06, |
|
"loss": 0.5837, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.566181385415478e-06, |
|
"loss": 0.5812, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.511838580131183e-06, |
|
"loss": 0.5769, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.45749577484689e-06, |
|
"loss": 0.5784, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.403152969562596e-06, |
|
"loss": 0.5872, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.348810164278301e-06, |
|
"loss": 0.5807, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.294467358994007e-06, |
|
"loss": 0.5857, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.240124553709712e-06, |
|
"loss": 0.5706, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.185781748425418e-06, |
|
"loss": 0.5816, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.131438943141123e-06, |
|
"loss": 0.5888, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.077096137856829e-06, |
|
"loss": 0.5799, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.022753332572534e-06, |
|
"loss": 0.5825, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.968410527288241e-06, |
|
"loss": 0.5783, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.914067722003947e-06, |
|
"loss": 0.5749, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.859724916719652e-06, |
|
"loss": 0.5817, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.805382111435358e-06, |
|
"loss": 0.582, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.751039306151063e-06, |
|
"loss": 0.5687, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.696696500866769e-06, |
|
"loss": 0.5702, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.642353695582474e-06, |
|
"loss": 0.5734, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.58801089029818e-06, |
|
"loss": 0.5769, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.533668085013885e-06, |
|
"loss": 0.5648, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.47932527972959e-06, |
|
"loss": 0.5686, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.424982474445297e-06, |
|
"loss": 0.572, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.370639669161003e-06, |
|
"loss": 0.5753, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.316296863876707e-06, |
|
"loss": 0.5745, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.261954058592412e-06, |
|
"loss": 0.5713, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.20761125330812e-06, |
|
"loss": 0.5732, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.153268448023825e-06, |
|
"loss": 0.5623, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.09892564273953e-06, |
|
"loss": 0.5679, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.044582837455236e-06, |
|
"loss": 0.5711, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.990240032170941e-06, |
|
"loss": 0.5773, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.935897226886648e-06, |
|
"loss": 0.5788, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.881554421602352e-06, |
|
"loss": 0.5626, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.827211616318057e-06, |
|
"loss": 0.5688, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.772868811033763e-06, |
|
"loss": 0.5576, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.71852600574947e-06, |
|
"loss": 0.5659, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.664183200465176e-06, |
|
"loss": 0.5523, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.609840395180881e-06, |
|
"loss": 0.5633, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.5554975898965864e-06, |
|
"loss": 0.5677, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.501154784612292e-06, |
|
"loss": 0.5604, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.4468119793279965e-06, |
|
"loss": 0.562, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.392469174043703e-06, |
|
"loss": 0.5699, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.338126368759408e-06, |
|
"loss": 0.5636, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.2837835634751146e-06, |
|
"loss": 0.5534, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.22944075819082e-06, |
|
"loss": 0.563, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.1750979529065255e-06, |
|
"loss": 0.5629, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.120755147622232e-06, |
|
"loss": 0.5682, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.066412342337937e-06, |
|
"loss": 0.5704, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.012069537053643e-06, |
|
"loss": 0.5582, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.957726731769347e-06, |
|
"loss": 0.5696, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.903383926485054e-06, |
|
"loss": 0.5649, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.849041121200759e-06, |
|
"loss": 0.5649, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.7946983159164645e-06, |
|
"loss": 0.5575, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.740355510632171e-06, |
|
"loss": 0.5649, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.686012705347876e-06, |
|
"loss": 0.5571, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.631669900063582e-06, |
|
"loss": 0.5639, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.577327094779288e-06, |
|
"loss": 0.5666, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.522984289494993e-06, |
|
"loss": 0.5539, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.468641484210698e-06, |
|
"loss": 0.5486, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.4142986789264035e-06, |
|
"loss": 0.5669, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.35995587364211e-06, |
|
"loss": 0.5627, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.305613068357815e-06, |
|
"loss": 0.554, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.251270263073521e-06, |
|
"loss": 0.5619, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.196927457789227e-06, |
|
"loss": 0.5525, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.1425846525049325e-06, |
|
"loss": 0.5609, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.088241847220637e-06, |
|
"loss": 0.5538, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.0338990419363434e-06, |
|
"loss": 0.555, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.979556236652049e-06, |
|
"loss": 0.5472, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.925213431367754e-06, |
|
"loss": 0.5593, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.870870626083461e-06, |
|
"loss": 0.5555, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.816527820799166e-06, |
|
"loss": 0.5561, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.7621850155148716e-06, |
|
"loss": 0.5585, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.707842210230578e-06, |
|
"loss": 0.5643, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.6534994049462825e-06, |
|
"loss": 0.548, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.599156599661988e-06, |
|
"loss": 0.5565, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.544813794377693e-06, |
|
"loss": 0.5441, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.4904709890934e-06, |
|
"loss": 0.5526, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.436128183809105e-06, |
|
"loss": 0.5505, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.381785378524811e-06, |
|
"loss": 0.5547, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.327442573240517e-06, |
|
"loss": 0.5512, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.273099767956222e-06, |
|
"loss": 0.5522, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.218756962671927e-06, |
|
"loss": 0.55, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.164414157387632e-06, |
|
"loss": 0.5566, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.110071352103339e-06, |
|
"loss": 0.5552, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.055728546819044e-06, |
|
"loss": 0.5499, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.00138574153475e-06, |
|
"loss": 0.5597, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.947042936250456e-06, |
|
"loss": 0.5447, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.8927001309661605e-06, |
|
"loss": 0.5513, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.838357325681867e-06, |
|
"loss": 0.5435, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.784014520397572e-06, |
|
"loss": 0.5485, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.729671715113278e-06, |
|
"loss": 0.5427, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.675328909828983e-06, |
|
"loss": 0.5565, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.6209861045446895e-06, |
|
"loss": 0.5499, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.566643299260395e-06, |
|
"loss": 0.5421, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.5123004939761e-06, |
|
"loss": 0.54, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.457957688691806e-06, |
|
"loss": 0.5435, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.403614883407511e-06, |
|
"loss": 0.5484, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.349272078123218e-06, |
|
"loss": 0.5478, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.294929272838923e-06, |
|
"loss": 0.5519, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.2405864675546285e-06, |
|
"loss": 0.5531, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.186243662270334e-06, |
|
"loss": 0.5411, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.1319008569860394e-06, |
|
"loss": 0.5424, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.077558051701746e-06, |
|
"loss": 0.5506, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.02321524641745e-06, |
|
"loss": 0.5465, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.968872441133157e-06, |
|
"loss": 0.5448, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.914529635848862e-06, |
|
"loss": 0.5347, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.8601868305645676e-06, |
|
"loss": 0.5429, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.805844025280273e-06, |
|
"loss": 0.5401, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.751501219995979e-06, |
|
"loss": 0.5428, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6971584147116848e-06, |
|
"loss": 0.5414, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6428156094273902e-06, |
|
"loss": 0.541, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.5884728041430957e-06, |
|
"loss": 0.5396, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.534129998858801e-06, |
|
"loss": 0.5398, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.479787193574507e-06, |
|
"loss": 0.5393, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.425444388290213e-06, |
|
"loss": 0.5378, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.371101583005918e-06, |
|
"loss": 0.5344, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.316758777721624e-06, |
|
"loss": 0.5418, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.2624159724373293e-06, |
|
"loss": 0.5266, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.208073167153035e-06, |
|
"loss": 0.5376, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.15373036186874e-06, |
|
"loss": 0.5471, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.099387556584446e-06, |
|
"loss": 0.5451, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.045044751300152e-06, |
|
"loss": 0.5451, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.990701946015858e-06, |
|
"loss": 0.5423, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.9363591407315633e-06, |
|
"loss": 0.5332, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.8820163354472687e-06, |
|
"loss": 0.5367, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.827673530162974e-06, |
|
"loss": 0.5366, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.77333072487868e-06, |
|
"loss": 0.5362, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.718987919594386e-06, |
|
"loss": 0.5386, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.664645114310091e-06, |
|
"loss": 0.5381, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.610302309025797e-06, |
|
"loss": 0.5415, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5559595037415023e-06, |
|
"loss": 0.5308, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.501616698457208e-06, |
|
"loss": 0.5298, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.4472738931729136e-06, |
|
"loss": 0.5209, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.392931087888619e-06, |
|
"loss": 0.5374, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.338588282604325e-06, |
|
"loss": 0.5437, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.2842454773200304e-06, |
|
"loss": 0.5353, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.2299026720357363e-06, |
|
"loss": 0.5377, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.1755598667514418e-06, |
|
"loss": 0.5412, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.121217061467147e-06, |
|
"loss": 0.5236, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.0668742561828527e-06, |
|
"loss": 0.5369, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.0125314508985585e-06, |
|
"loss": 0.529, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.958188645614264e-06, |
|
"loss": 0.5338, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.9038458403299699e-06, |
|
"loss": 0.53, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8495030350456753e-06, |
|
"loss": 0.5254, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.795160229761381e-06, |
|
"loss": 0.5382, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.7408174244770865e-06, |
|
"loss": 0.541, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6864746191927921e-06, |
|
"loss": 0.5296, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6321318139084976e-06, |
|
"loss": 0.5304, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.5777890086242035e-06, |
|
"loss": 0.5253, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.523446203339909e-06, |
|
"loss": 0.5189, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4691033980556146e-06, |
|
"loss": 0.5359, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.41476059277132e-06, |
|
"loss": 0.5368, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.360417787487026e-06, |
|
"loss": 0.5265, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3060749822027314e-06, |
|
"loss": 0.5289, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.251732176918437e-06, |
|
"loss": 0.5294, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1973893716341425e-06, |
|
"loss": 0.5293, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1430465663498482e-06, |
|
"loss": 0.5342, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.0887037610655538e-06, |
|
"loss": 0.5182, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.0343609557812593e-06, |
|
"loss": 0.5311, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.80018150496965e-07, |
|
"loss": 0.5386, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.256753452126706e-07, |
|
"loss": 0.5291, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.713325399283762e-07, |
|
"loss": 0.5233, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.169897346440818e-07, |
|
"loss": 0.5167, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.626469293597875e-07, |
|
"loss": 0.5339, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.083041240754932e-07, |
|
"loss": 0.5263, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.539613187911987e-07, |
|
"loss": 0.5255, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.996185135069043e-07, |
|
"loss": 0.5292, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.452757082226099e-07, |
|
"loss": 0.5247, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.909329029383155e-07, |
|
"loss": 0.5293, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.3659009765402114e-07, |
|
"loss": 0.5278, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.8224729236972675e-07, |
|
"loss": 0.5151, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.279044870854323e-07, |
|
"loss": 0.515, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.73561681801138e-07, |
|
"loss": 0.5305, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.192188765168436e-07, |
|
"loss": 0.5231, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.6487607123254918e-07, |
|
"loss": 0.526, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.1053326594825478e-07, |
|
"loss": 0.5243, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.6190460663960404e-08, |
|
"loss": 0.5192, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.8476553796660092e-09, |
|
"loss": 0.5343, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7746184738955824, |
|
"eval_loss": 0.587454617023468, |
|
"eval_runtime": 72.5338, |
|
"eval_samples_per_second": 514.932, |
|
"eval_steps_per_second": 64.37, |
|
"step": 184017 |
|
} |
|
], |
|
"max_steps": 184017, |
|
"num_train_epochs": 1, |
|
"total_flos": 3.8733781342346496e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|