|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 862850, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.112668990819203e-05, |
|
"loss": 8.0034, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.533733258325905e-05, |
|
"loss": 7.5343, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7778615089460934e-05, |
|
"loss": 7.2281, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9509713131658946e-05, |
|
"loss": 6.5225, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-05, |
|
"loss": 6.0666, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-05, |
|
"loss": 5.7596, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-05, |
|
"loss": 5.5413, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-05, |
|
"loss": 5.3661, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-05, |
|
"loss": 5.2017, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-05, |
|
"loss": 5.052, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-05, |
|
"loss": 4.9005, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-05, |
|
"loss": 4.7597, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-05, |
|
"loss": 4.6439, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-05, |
|
"loss": 4.5544, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-05, |
|
"loss": 4.4742, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5e-05, |
|
"loss": 4.4085, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5e-05, |
|
"loss": 4.3437, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5e-05, |
|
"loss": 4.2957, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5e-05, |
|
"loss": 4.2381, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-05, |
|
"loss": 4.1846, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-05, |
|
"loss": 4.1399, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-05, |
|
"loss": 4.1097, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-05, |
|
"loss": 4.0513, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5e-05, |
|
"loss": 4.0177, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5e-05, |
|
"loss": 3.9807, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5e-05, |
|
"loss": 3.9417, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5e-05, |
|
"loss": 3.9119, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5e-05, |
|
"loss": 3.8789, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5e-05, |
|
"loss": 3.8467, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5e-05, |
|
"loss": 3.8162, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5e-05, |
|
"loss": 3.7815, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5e-05, |
|
"loss": 3.755, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5e-05, |
|
"loss": 3.7321, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5e-05, |
|
"loss": 3.7082, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5e-05, |
|
"loss": 3.6796, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5e-05, |
|
"loss": 3.6481, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5e-05, |
|
"loss": 3.6342, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5971, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5824, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5602, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5299, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5166, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"loss": 3.494, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"loss": 3.4759, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"loss": 3.4558, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5e-05, |
|
"loss": 3.4323, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5e-05, |
|
"loss": 3.415, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5e-05, |
|
"loss": 3.396, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3864, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3646, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3441, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3341, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3227, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3056, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"loss": 3.289, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"loss": 3.2696, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"loss": 3.258, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"loss": 3.2438, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5e-05, |
|
"loss": 3.223, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5e-05, |
|
"loss": 3.2181, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5e-05, |
|
"loss": 3.2049, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1886, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 3.183, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1623, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1553, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1407, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1316, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1209, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1047, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0984, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0901, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0713, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0674, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0568, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0448, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0282, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0217, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0215, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0051, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9998, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9937, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9874, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9712, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9656, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9621, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9509, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9423, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9436, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9297, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9205, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9103, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9019, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8987, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8913, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8863, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8781, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8695, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8731, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8595, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8548, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8471, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.84, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8328, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8317, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8278, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.821, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8145, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8083, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7986, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7942, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7862, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.782, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7773, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7767, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.771, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7649, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7632, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.755, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7415, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7435, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7405, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7348, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7296, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7244, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7208, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7157, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7143, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7108, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7009, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7023, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6931, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6901, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6871, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6843, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6842, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6771, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6708, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6638, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6617, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6626, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.655, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6576, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6408, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6422, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6416, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6411, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6327, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6334, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6208, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6232, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5274870522655163, |
|
"eval_loss": 2.509765625, |
|
"eval_runtime": 4883.386, |
|
"eval_samples_per_second": 114.367, |
|
"eval_steps_per_second": 0.894, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6229, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6185, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6103, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6089, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6157, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6045, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6045, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5938, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5924, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5943, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.589, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5906, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.575, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5799, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.576, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.574, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5697, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5699, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5694, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5629, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.563, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5618, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.551, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5494, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.544, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5483, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5458, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5463, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5353, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5317, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5352, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5295, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5286, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5196, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5222, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5245, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5154, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.509, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5131, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5072, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5079, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5062, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5008, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.504, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5013, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4979, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4919, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4892, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4897, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4897, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4859, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4766, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4815, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4797, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4782, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.477, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4714, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4693, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4647, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4675, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4628, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.458, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4575, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4588, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4541, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4556, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4419, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4505, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4469, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4488, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4493, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4399, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4408, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4413, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4375, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4306, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4354, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.426, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4307, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4297, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4268, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4249, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4211, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4233, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.415, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4163, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4185, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4148, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4089, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.411, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4061, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4056, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.402, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4042, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4025, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3981, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4005, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3938, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3914, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3901, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3912, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3881, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3904, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3907, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3827, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3832, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3824, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3831, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.377, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3732, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3753, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3747, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3684, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3716, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3681, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3694, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3622, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3646, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.367, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.365, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3608, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3558, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3627, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3541, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3541, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3516, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.354, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3494, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3529, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3493, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3519, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3442, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3431, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3489, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3446, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3451, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3307, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3336, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3383, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3376, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3336, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3351, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3296, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3295, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3274, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3268, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3289, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3293, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3212, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3189, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.5684444981680046, |
|
"eval_loss": 2.2109375, |
|
"eval_runtime": 4880.4554, |
|
"eval_samples_per_second": 114.436, |
|
"eval_steps_per_second": 0.894, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3193, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.317, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3212, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.317, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3183, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3178, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3085, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3127, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3149, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3091, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3088, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3097, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3131, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3031, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.302, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3056, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3046, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2997, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2933, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2992, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2947, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2933, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.295, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2935, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2926, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2887, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2966, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2924, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2897, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2903, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.282, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.281, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2903, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2831, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2874, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2722, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2823, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2793, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2839, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2808, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2782, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2751, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2728, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2736, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2761, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2692, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.269, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2704, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2673, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2657, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2731, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2677, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2628, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2665, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2649, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.266, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2626, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2605, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2598, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2541, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2527, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2495, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2552, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2575, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2532, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2531, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2512, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2536, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.25, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2494, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2454, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2416, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2417, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2418, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2467, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2409, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2451, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2397, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2392, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2388, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2369, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2354, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2367, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2383, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2352, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.236, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2327, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2345, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2361, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2316, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2315, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2315, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.227, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2307, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2276, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.219, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.222, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2262, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2224, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.217, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2236, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2256, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2181, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2209, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2246, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.223, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2164, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2105, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2181, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2212, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2138, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.208, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.217, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2128, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2139, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.206, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.213, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2072, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2115, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2064, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2061, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2079, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2088, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2069, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2037, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2074, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2036, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.202, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2022, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2031, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1998, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.202, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1916, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1934, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1958, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1933, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1945, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1889, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1929, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1935, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1883, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1882, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1835, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1862, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1869, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1906, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1825, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1902, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1843, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1811, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_accuracy": 0.5877310925035356, |
|
"eval_loss": 2.078125, |
|
"eval_runtime": 4896.1088, |
|
"eval_samples_per_second": 114.07, |
|
"eval_steps_per_second": 0.891, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1848, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1788, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.185, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1827, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1772, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1792, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1799, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1831, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1763, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1773, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1804, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1711, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1788, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.174, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1708, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1772, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.174, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1663, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1737, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1696, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1726, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1681, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.174, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1679, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.17, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1664, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1657, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1664, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1674, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1616, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1597, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.161, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1626, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1643, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1597, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1575, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1551, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1561, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1524, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1631, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1553, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1595, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1556, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1572, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1586, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.161, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.152, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1611, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1484, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1516, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1504, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1493, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1494, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1507, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1555, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1522, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1477, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1452, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1516, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1486, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1481, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1461, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1393, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1419, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1376, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1486, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1425, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.147, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1458, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1393, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.141, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1375, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.143, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1395, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1376, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1365, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1378, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1361, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1397, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1319, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1341, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1307, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1273, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1317, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1268, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1342, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1372, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1311, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1301, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1283, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1264, |
|
"step": 541000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1296, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1277, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1296, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1264, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1244, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1227, |
|
"step": 547000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1258, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1238, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1293, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1263, |
|
"step": 551000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.117, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1237, |
|
"step": 553000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1251, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1243, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1187, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1179, |
|
"step": 557000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1226, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1182, |
|
"step": 559000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1159, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1186, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.116, |
|
"step": 562000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1152, |
|
"step": 563000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1195, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1144, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.125, |
|
"step": 566000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1127, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1177, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1148, |
|
"step": 569000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1185, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.115, |
|
"step": 571000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1127, |
|
"step": 572000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1075, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.111, |
|
"step": 574000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1139, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1092, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1126, |
|
"step": 577000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.112, |
|
"step": 578000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1093, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1008, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1119, |
|
"step": 581000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1105, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1078, |
|
"step": 583000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1109, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1093, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1047, |
|
"step": 586000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1038, |
|
"step": 587000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1074, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.105, |
|
"step": 589000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1114, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1002, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0985, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1004, |
|
"step": 593000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1008, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0989, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.102, |
|
"step": 596000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1039, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0943, |
|
"step": 598000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1032, |
|
"step": 599000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1048, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.6001050830453463, |
|
"eval_loss": 1.994140625, |
|
"eval_runtime": 4891.4227, |
|
"eval_samples_per_second": 114.179, |
|
"eval_steps_per_second": 0.892, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0963, |
|
"step": 601000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1025, |
|
"step": 602000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0995, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1012, |
|
"step": 604000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0948, |
|
"step": 605000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0975, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0955, |
|
"step": 607000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0978, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0966, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0921, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.092, |
|
"step": 611000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0944, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.095, |
|
"step": 613000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0939, |
|
"step": 614000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0952, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0928, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0932, |
|
"step": 617000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0881, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0912, |
|
"step": 619000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0909, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0856, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0906, |
|
"step": 622000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0905, |
|
"step": 623000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0824, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0874, |
|
"step": 625000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0894, |
|
"step": 626000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0824, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0874, |
|
"step": 628000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.088, |
|
"step": 629000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.088, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0815, |
|
"step": 631000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0835, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0886, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0842, |
|
"step": 634000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0814, |
|
"step": 635000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0822, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0811, |
|
"step": 637000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0883, |
|
"step": 638000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0858, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0841, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0893, |
|
"step": 641000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0807, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0841, |
|
"step": 643000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0803, |
|
"step": 644000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.086, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0816, |
|
"step": 646000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.083, |
|
"step": 647000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0751, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0836, |
|
"step": 649000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0757, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0795, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0755, |
|
"step": 652000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0811, |
|
"step": 653000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0769, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0737, |
|
"step": 655000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0803, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0739, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0783, |
|
"step": 658000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0744, |
|
"step": 659000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0696, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0785, |
|
"step": 661000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0742, |
|
"step": 662000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0715, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0705, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0698, |
|
"step": 665000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0703, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.075, |
|
"step": 667000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0694, |
|
"step": 668000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0708, |
|
"step": 669000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0656, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.069, |
|
"step": 671000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0724, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0699, |
|
"step": 673000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0695, |
|
"step": 674000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.068, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0671, |
|
"step": 676000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0716, |
|
"step": 677000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0689, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.062, |
|
"step": 679000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0733, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0671, |
|
"step": 681000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0676, |
|
"step": 682000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0651, |
|
"step": 683000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0594, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0658, |
|
"step": 685000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0686, |
|
"step": 686000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0685, |
|
"step": 687000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0632, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0662, |
|
"step": 689000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0615, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0546, |
|
"step": 691000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0593, |
|
"step": 692000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0599, |
|
"step": 693000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0635, |
|
"step": 694000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0621, |
|
"step": 695000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0654, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0576, |
|
"step": 697000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0563, |
|
"step": 698000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.059, |
|
"step": 699000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0616, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0642, |
|
"step": 701000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.052, |
|
"step": 702000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0569, |
|
"step": 703000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0617, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0613, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0593, |
|
"step": 706000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0582, |
|
"step": 707000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0587, |
|
"step": 708000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0518, |
|
"step": 709000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0585, |
|
"step": 710000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.052, |
|
"step": 711000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.056, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0559, |
|
"step": 713000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0538, |
|
"step": 714000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0566, |
|
"step": 715000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0519, |
|
"step": 716000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.058, |
|
"step": 717000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0627, |
|
"step": 718000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0476, |
|
"step": 719000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0484, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0545, |
|
"step": 721000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0558, |
|
"step": 722000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0547, |
|
"step": 723000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0501, |
|
"step": 724000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0562, |
|
"step": 725000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0467, |
|
"step": 726000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0438, |
|
"step": 727000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0458, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0504, |
|
"step": 729000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0501, |
|
"step": 730000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0498, |
|
"step": 731000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0495, |
|
"step": 732000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0494, |
|
"step": 733000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0469, |
|
"step": 734000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0503, |
|
"step": 735000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0492, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0445, |
|
"step": 737000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.044, |
|
"step": 738000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0418, |
|
"step": 739000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0473, |
|
"step": 740000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0503, |
|
"step": 741000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0513, |
|
"step": 742000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0492, |
|
"step": 743000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0471, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0357, |
|
"step": 745000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.045, |
|
"step": 746000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.042, |
|
"step": 747000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0485, |
|
"step": 748000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.043, |
|
"step": 749000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0345, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.6086964451170365, |
|
"eval_loss": 1.935546875, |
|
"eval_runtime": 4905.4193, |
|
"eval_samples_per_second": 113.853, |
|
"eval_steps_per_second": 0.89, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0407, |
|
"step": 751000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.042, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0446, |
|
"step": 753000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0447, |
|
"step": 754000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0402, |
|
"step": 755000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0392, |
|
"step": 756000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.037, |
|
"step": 757000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0341, |
|
"step": 758000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0381, |
|
"step": 759000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0374, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0347, |
|
"step": 761000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0345, |
|
"step": 762000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.036, |
|
"step": 763000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0357, |
|
"step": 764000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0386, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0363, |
|
"step": 766000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0406, |
|
"step": 767000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0332, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0358, |
|
"step": 769000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0359, |
|
"step": 770000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0429, |
|
"step": 771000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0402, |
|
"step": 772000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0372, |
|
"step": 773000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0343, |
|
"step": 774000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0351, |
|
"step": 775000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0366, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0364, |
|
"step": 777000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.034, |
|
"step": 778000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0313, |
|
"step": 779000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0281, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0316, |
|
"step": 781000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0298, |
|
"step": 782000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0292, |
|
"step": 783000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0269, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0309, |
|
"step": 785000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0316, |
|
"step": 786000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0379, |
|
"step": 787000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0298, |
|
"step": 788000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.028, |
|
"step": 789000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0278, |
|
"step": 790000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0316, |
|
"step": 791000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0269, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.031, |
|
"step": 793000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0281, |
|
"step": 794000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0237, |
|
"step": 795000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0244, |
|
"step": 796000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0262, |
|
"step": 797000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0275, |
|
"step": 798000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0253, |
|
"step": 799000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0336, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.029, |
|
"step": 801000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0271, |
|
"step": 802000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0221, |
|
"step": 803000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0287, |
|
"step": 804000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0264, |
|
"step": 805000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0263, |
|
"step": 806000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0242, |
|
"step": 807000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0221, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0242, |
|
"step": 809000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0212, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0213, |
|
"step": 811000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0182, |
|
"step": 812000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0195, |
|
"step": 813000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0245, |
|
"step": 814000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.018, |
|
"step": 815000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0219, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0221, |
|
"step": 817000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.019, |
|
"step": 818000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0199, |
|
"step": 819000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0166, |
|
"step": 820000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0192, |
|
"step": 821000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0217, |
|
"step": 822000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0179, |
|
"step": 823000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0242, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0158, |
|
"step": 825000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.013, |
|
"step": 826000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0196, |
|
"step": 827000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.02, |
|
"step": 828000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.021, |
|
"step": 829000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0192, |
|
"step": 830000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0191, |
|
"step": 831000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0184, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0196, |
|
"step": 833000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0183, |
|
"step": 834000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0134, |
|
"step": 835000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0163, |
|
"step": 836000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0113, |
|
"step": 837000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0206, |
|
"step": 838000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0166, |
|
"step": 839000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0181, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0168, |
|
"step": 841000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.01, |
|
"step": 842000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0153, |
|
"step": 843000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0084, |
|
"step": 844000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0169, |
|
"step": 845000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0142, |
|
"step": 846000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0132, |
|
"step": 847000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0126, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0121, |
|
"step": 849000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0122, |
|
"step": 850000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0149, |
|
"step": 851000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0125, |
|
"step": 852000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0074, |
|
"step": 853000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0136, |
|
"step": 854000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0118, |
|
"step": 855000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0139, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0158, |
|
"step": 857000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0105, |
|
"step": 858000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0075, |
|
"step": 859000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.007, |
|
"step": 860000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0143, |
|
"step": 861000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0067, |
|
"step": 862000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 862850, |
|
"total_flos": 1.5713271682523202e+19, |
|
"train_loss": 0.02994945877374942, |
|
"train_runtime": 24111.6962, |
|
"train_samples_per_second": 9161.097, |
|
"train_steps_per_second": 35.786 |
|
} |
|
], |
|
"max_steps": 862850, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.5713271682523202e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|