|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 1964054, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.0500635531186734e-05, |
|
"loss": 7.9258, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.4647181356188376e-05, |
|
"loss": 6.7206, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.705525479232612e-05, |
|
"loss": 6.2705, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.875604750285333e-05, |
|
"loss": 5.9721, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-05, |
|
"loss": 5.7209, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-05, |
|
"loss": 5.5184, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-05, |
|
"loss": 5.3619, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-05, |
|
"loss": 5.2279, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-05, |
|
"loss": 5.1094, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-05, |
|
"loss": 5.0125, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-05, |
|
"loss": 4.9219, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-05, |
|
"loss": 4.8491, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-05, |
|
"loss": 4.7722, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-05, |
|
"loss": 4.7121, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-05, |
|
"loss": 4.6485, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-05, |
|
"loss": 4.582, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-05, |
|
"loss": 4.5365, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5e-05, |
|
"loss": 4.4739, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5e-05, |
|
"loss": 4.4262, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5e-05, |
|
"loss": 4.3785, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5e-05, |
|
"loss": 4.3253, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5e-05, |
|
"loss": 4.2774, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-05, |
|
"loss": 4.2332, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-05, |
|
"loss": 4.1947, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-05, |
|
"loss": 4.1613, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-05, |
|
"loss": 4.1186, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-05, |
|
"loss": 4.0885, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5e-05, |
|
"loss": 4.0514, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5e-05, |
|
"loss": 4.019, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5e-05, |
|
"loss": 3.984, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5e-05, |
|
"loss": 3.9533, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5e-05, |
|
"loss": 3.9193, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5e-05, |
|
"loss": 3.9009, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5e-05, |
|
"loss": 3.8693, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5e-05, |
|
"loss": 3.8376, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5e-05, |
|
"loss": 3.8107, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5e-05, |
|
"loss": 3.7897, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5e-05, |
|
"loss": 3.7741, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5e-05, |
|
"loss": 3.7481, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5e-05, |
|
"loss": 3.726, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5e-05, |
|
"loss": 3.6989, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5e-05, |
|
"loss": 3.6819, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5e-05, |
|
"loss": 3.6614, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5e-05, |
|
"loss": 3.6406, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5e-05, |
|
"loss": 3.6174, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5e-05, |
|
"loss": 3.6004, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5884, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5653, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"loss": 3.554, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5352, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5185, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5032, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5e-05, |
|
"loss": 3.4902, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5e-05, |
|
"loss": 3.4716, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5e-05, |
|
"loss": 3.4541, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5e-05, |
|
"loss": 3.4419, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5e-05, |
|
"loss": 3.4297, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5e-05, |
|
"loss": 3.4116, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5e-05, |
|
"loss": 3.4023, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3892, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3774, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"loss": 3.368, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3504, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3382, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3264, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3244, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3115, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5e-05, |
|
"loss": 3.3021, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5e-05, |
|
"loss": 3.287, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5e-05, |
|
"loss": 3.287, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5e-05, |
|
"loss": 3.2652, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 3.2519, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 3.2475, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 3.239, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 3.2336, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 3.2193, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5e-05, |
|
"loss": 3.2119, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5e-05, |
|
"loss": 3.2065, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1932, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1863, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1714, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1721, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1595, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1564, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1508, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5e-05, |
|
"loss": 3.131, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1199, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1231, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1172, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1107, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1006, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0946, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0934, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0837, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 3.074, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0712, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0704, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0613, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 3.046, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0416, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0417, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0353, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0309, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0247, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0213, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0103, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9965, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9967, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9975, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9872, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.982, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9821, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9704, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9698, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9595, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9603, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9484, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9532, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9456, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9333, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9323, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9328, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9312, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9226, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9158, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9064, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9094, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8974, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8964, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8948, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8862, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8835, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8863, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8821, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8781, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8695, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8673, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8605, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.865, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8576, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8584, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8458, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8427, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8444, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8391, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8311, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8212, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8259, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.827, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.824, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.5210951936592287, |
|
"eval_loss": 2.658203125, |
|
"eval_runtime": 4374.358, |
|
"eval_samples_per_second": 145.413, |
|
"eval_steps_per_second": 1.136, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8211, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8055, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8112, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8067, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8039, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8047, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7934, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.793, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7861, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7845, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7863, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7825, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7833, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7791, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7702, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.77, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7637, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7632, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7622, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7558, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7611, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7467, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7524, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.743, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7393, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7372, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7393, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7414, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7389, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7251, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7306, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7324, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.723, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7187, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7174, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7184, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7167, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7103, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7066, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7023, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7059, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6976, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6931, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6967, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6932, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6876, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6884, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6927, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6813, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6867, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6853, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6862, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6763, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6702, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6732, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6713, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6722, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6585, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6609, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6594, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.652, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6545, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6552, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6618, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6521, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.642, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6433, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6456, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6445, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6466, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6393, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6348, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6382, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6366, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6332, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.631, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6286, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6269, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6244, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.629, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6219, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6185, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6206, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.618, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6179, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6116, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6078, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6169, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6091, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6065, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6078, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6051, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5986, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6005, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5998, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6003, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5952, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5907, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5934, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5948, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5879, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5868, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5885, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5854, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5853, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5803, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5759, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5729, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5737, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5786, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5704, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.571, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5674, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5704, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5651, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.566, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5664, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5699, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5628, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5494, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5604, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5563, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5598, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.551, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5519, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5543, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5533, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.552, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.549, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5509, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5544, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5473, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.543, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5403, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5488, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5361, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5385, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5422, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5363, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5344, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5315, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5345, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.532, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5338, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5299, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5191, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5273, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5255, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5229, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5179, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.559970362665269, |
|
"eval_loss": 2.365234375, |
|
"eval_runtime": 4356.0676, |
|
"eval_samples_per_second": 146.024, |
|
"eval_steps_per_second": 1.141, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5194, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5175, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5112, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5167, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5064, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5107, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5186, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5124, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5068, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5067, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5117, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5057, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5043, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5088, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5001, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5047, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.502, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5013, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4957, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5014, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4969, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4977, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4984, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4955, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4938, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4898, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4916, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4895, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.488, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4883, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4857, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4836, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4799, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4858, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4765, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4771, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4756, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4804, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.472, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4687, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4754, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.472, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4764, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4749, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4738, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4738, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4718, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4636, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4647, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4698, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4628, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4639, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4678, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4631, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4603, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4574, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4646, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4569, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4532, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.454, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4575, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4595, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4521, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4497, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4498, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4557, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4539, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4554, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4493, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4545, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4434, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4434, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4466, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4404, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4474, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.448, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.438, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4293, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4345, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4353, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4443, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4342, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4366, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4333, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4332, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4417, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4337, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4338, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4343, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4296, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4279, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4267, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4282, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.423, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4308, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4268, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4241, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4235, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4261, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4277, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4179, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4187, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.422, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4162, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4152, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4172, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4204, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4138, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4152, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4157, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4187, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4146, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4143, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4064, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4121, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4184, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4115, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4108, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4053, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4071, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4116, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4053, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4033, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4029, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.399, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3942, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4024, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4029, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3979, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4071, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4013, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3988, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3961, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.397, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3924, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3951, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.394, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.392, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3939, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3875, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3946, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3964, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3913, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3847, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3848, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3913, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3868, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3861, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3807, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3852, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.5776840726507144, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 4352.3033, |
|
"eval_samples_per_second": 146.15, |
|
"eval_steps_per_second": 1.142, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3857, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.385, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3847, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3855, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3818, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3804, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3855, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3847, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3857, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3729, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.377, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3784, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.374, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.377, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3847, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3745, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3809, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.378, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3776, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3706, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3707, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3761, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3766, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3704, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3698, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.361, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.37, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3639, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3656, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3704, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3593, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3684, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.363, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3646, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3638, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3579, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3678, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3608, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3639, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.359, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3676, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.38, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3867, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3887, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3917, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4049, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3964, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4028, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4004, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4035, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.397, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3992, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4069, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4047, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4067, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4047, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.406, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4089, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4097, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4079, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4092, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4071, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4099, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4075, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4157, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4169, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4146, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4118, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4097, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4072, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4095, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.411, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4136, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4114, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4107, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4148, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4154, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4142, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4168, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4195, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4123, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4143, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4077, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4118, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4117, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4128, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4087, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4105, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4088, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4101, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4183, |
|
"step": 541000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4111, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4158, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4133, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4061, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4076, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4027, |
|
"step": 547000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4116, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4056, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4049, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4156, |
|
"step": 551000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4069, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.413, |
|
"step": 553000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4145, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4054, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4105, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.411, |
|
"step": 557000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4102, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4046, |
|
"step": 559000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.418, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4127, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4092, |
|
"step": 562000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4132, |
|
"step": 563000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4044, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4066, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.408, |
|
"step": 566000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4036, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4081, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4057, |
|
"step": 569000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4054, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4042, |
|
"step": 571000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4097, |
|
"step": 572000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.402, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.406, |
|
"step": 574000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4076, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4037, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4065, |
|
"step": 577000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4057, |
|
"step": 578000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4074, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4093, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4106, |
|
"step": 581000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4098, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4111, |
|
"step": 583000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.407, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4061, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4093, |
|
"step": 586000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4131, |
|
"step": 587000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4063, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3983, |
|
"step": 589000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3993, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4097, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4116, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4072, |
|
"step": 593000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4063, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4001, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4022, |
|
"step": 596000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4016, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4054, |
|
"step": 598000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4033, |
|
"step": 599000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4014, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.5762903057161931, |
|
"eval_loss": 2.248046875, |
|
"eval_runtime": 4358.0709, |
|
"eval_samples_per_second": 145.957, |
|
"eval_steps_per_second": 1.14, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4013, |
|
"step": 601000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4027, |
|
"step": 602000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4046, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4089, |
|
"step": 604000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.402, |
|
"step": 605000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4032, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3969, |
|
"step": 607000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3968, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3976, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.405, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.396, |
|
"step": 611000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4035, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4045, |
|
"step": 613000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4028, |
|
"step": 614000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4025, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4006, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4004, |
|
"step": 617000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3981, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4053, |
|
"step": 619000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3995, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3994, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3977, |
|
"step": 622000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4009, |
|
"step": 623000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4015, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3978, |
|
"step": 625000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3966, |
|
"step": 626000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3992, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3999, |
|
"step": 628000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3962, |
|
"step": 629000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3965, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3983, |
|
"step": 631000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3979, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3962, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3931, |
|
"step": 634000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3965, |
|
"step": 635000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3975, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3923, |
|
"step": 637000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3994, |
|
"step": 638000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3969, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3968, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3943, |
|
"step": 641000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3919, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3971, |
|
"step": 643000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3995, |
|
"step": 644000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3909, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4019, |
|
"step": 646000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3957, |
|
"step": 647000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3901, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3967, |
|
"step": 649000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3922, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3917, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3872, |
|
"step": 652000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3969, |
|
"step": 653000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3913, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3949, |
|
"step": 655000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3943, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3876, |
|
"step": 658000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3951, |
|
"step": 659000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.393, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3904, |
|
"step": 661000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3896, |
|
"step": 662000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3896, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3906, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3882, |
|
"step": 665000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3878, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.385, |
|
"step": 667000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3857, |
|
"step": 668000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3902, |
|
"step": 669000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3943, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3948, |
|
"step": 671000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3866, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3854, |
|
"step": 673000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3835, |
|
"step": 674000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.387, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3926, |
|
"step": 676000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3857, |
|
"step": 677000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3924, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3847, |
|
"step": 679000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3876, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3831, |
|
"step": 681000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3858, |
|
"step": 682000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3895, |
|
"step": 683000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3885, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3884, |
|
"step": 685000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.385, |
|
"step": 686000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.384, |
|
"step": 687000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3894, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.385, |
|
"step": 689000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3805, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3866, |
|
"step": 691000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3893, |
|
"step": 692000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3863, |
|
"step": 693000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.385, |
|
"step": 694000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3872, |
|
"step": 695000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3819, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3841, |
|
"step": 697000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.383, |
|
"step": 698000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3881, |
|
"step": 699000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3789, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3898, |
|
"step": 701000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.387, |
|
"step": 702000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3809, |
|
"step": 703000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.381, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3913, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3808, |
|
"step": 706000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.373, |
|
"step": 707000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3858, |
|
"step": 708000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3818, |
|
"step": 709000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3791, |
|
"step": 710000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3801, |
|
"step": 711000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3827, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3809, |
|
"step": 713000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3788, |
|
"step": 714000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3793, |
|
"step": 715000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3795, |
|
"step": 716000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3744, |
|
"step": 717000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.383, |
|
"step": 718000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3708, |
|
"step": 719000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3771, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3825, |
|
"step": 721000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3769, |
|
"step": 722000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3809, |
|
"step": 723000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3814, |
|
"step": 724000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3737, |
|
"step": 725000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3754, |
|
"step": 726000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3774, |
|
"step": 727000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.38, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3769, |
|
"step": 729000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3783, |
|
"step": 730000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3766, |
|
"step": 731000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3771, |
|
"step": 732000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3799, |
|
"step": 733000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3794, |
|
"step": 734000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3684, |
|
"step": 735000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3789, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3809, |
|
"step": 737000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3753, |
|
"step": 738000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3762, |
|
"step": 739000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3747, |
|
"step": 740000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3634, |
|
"step": 741000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3763, |
|
"step": 742000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3777, |
|
"step": 743000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3716, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3731, |
|
"step": 745000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3762, |
|
"step": 746000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3692, |
|
"step": 747000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3725, |
|
"step": 748000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3744, |
|
"step": 749000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3687, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.5800586539733796, |
|
"eval_loss": 2.21484375, |
|
"eval_runtime": 4527.6466, |
|
"eval_samples_per_second": 140.49, |
|
"eval_steps_per_second": 2.195, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3717, |
|
"step": 751000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3724, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3708, |
|
"step": 753000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3687, |
|
"step": 754000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.369, |
|
"step": 755000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3635, |
|
"step": 756000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3656, |
|
"step": 757000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3692, |
|
"step": 758000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3686, |
|
"step": 759000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3694, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3721, |
|
"step": 761000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3678, |
|
"step": 762000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3692, |
|
"step": 763000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3711, |
|
"step": 764000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3741, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3693, |
|
"step": 766000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3775, |
|
"step": 767000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3654, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3756, |
|
"step": 769000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.363, |
|
"step": 770000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.374, |
|
"step": 771000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3705, |
|
"step": 772000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3719, |
|
"step": 773000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3705, |
|
"step": 774000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3702, |
|
"step": 775000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3671, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3707, |
|
"step": 777000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.372, |
|
"step": 778000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3781, |
|
"step": 779000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3755, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3614, |
|
"step": 781000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3564, |
|
"step": 782000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3594, |
|
"step": 783000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3725, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3764, |
|
"step": 785000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3659, |
|
"step": 786000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3611, |
|
"step": 787000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.363, |
|
"step": 788000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3702, |
|
"step": 789000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3609, |
|
"step": 790000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3677, |
|
"step": 791000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3635, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3654, |
|
"step": 793000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3711, |
|
"step": 794000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3589, |
|
"step": 795000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3601, |
|
"step": 796000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.363, |
|
"step": 797000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3693, |
|
"step": 798000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3613, |
|
"step": 799000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.363, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3636, |
|
"step": 801000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3649, |
|
"step": 802000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3687, |
|
"step": 803000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3635, |
|
"step": 804000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3605, |
|
"step": 805000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3718, |
|
"step": 806000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3472, |
|
"step": 807000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3656, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3652, |
|
"step": 809000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3637, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3647, |
|
"step": 811000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3579, |
|
"step": 812000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3628, |
|
"step": 813000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3593, |
|
"step": 814000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3517, |
|
"step": 815000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3665, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3578, |
|
"step": 817000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3612, |
|
"step": 818000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3625, |
|
"step": 819000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3614, |
|
"step": 820000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3685, |
|
"step": 821000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3642, |
|
"step": 822000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3645, |
|
"step": 823000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3541, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3575, |
|
"step": 825000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3511, |
|
"step": 826000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3555, |
|
"step": 827000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3575, |
|
"step": 828000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3585, |
|
"step": 829000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3639, |
|
"step": 830000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3656, |
|
"step": 831000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3595, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.359, |
|
"step": 833000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3586, |
|
"step": 834000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3516, |
|
"step": 835000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3609, |
|
"step": 836000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.351, |
|
"step": 837000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3596, |
|
"step": 838000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3511, |
|
"step": 839000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3579, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3591, |
|
"step": 841000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3498, |
|
"step": 842000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3606, |
|
"step": 843000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3504, |
|
"step": 844000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3442, |
|
"step": 845000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3553, |
|
"step": 846000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.351, |
|
"step": 847000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3511, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3573, |
|
"step": 849000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3529, |
|
"step": 850000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.352, |
|
"step": 851000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3507, |
|
"step": 852000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3595, |
|
"step": 853000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.353, |
|
"step": 854000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3444, |
|
"step": 855000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3568, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3547, |
|
"step": 857000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.362, |
|
"step": 858000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3635, |
|
"step": 859000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3548, |
|
"step": 860000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3522, |
|
"step": 861000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.355, |
|
"step": 862000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3583, |
|
"step": 863000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3446, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3536, |
|
"step": 865000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3507, |
|
"step": 866000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3493, |
|
"step": 867000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3568, |
|
"step": 868000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3498, |
|
"step": 869000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3537, |
|
"step": 870000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3622, |
|
"step": 871000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3551, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.353, |
|
"step": 873000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.358, |
|
"step": 874000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.359, |
|
"step": 875000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3573, |
|
"step": 876000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3601, |
|
"step": 877000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3546, |
|
"step": 878000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3502, |
|
"step": 879000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3465, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3556, |
|
"step": 881000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3415, |
|
"step": 882000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3538, |
|
"step": 883000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.356, |
|
"step": 884000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3469, |
|
"step": 885000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3536, |
|
"step": 886000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3509, |
|
"step": 887000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3441, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.344, |
|
"step": 889000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3485, |
|
"step": 890000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3453, |
|
"step": 891000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3492, |
|
"step": 892000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3476, |
|
"step": 893000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3411, |
|
"step": 894000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3497, |
|
"step": 895000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3413, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3442, |
|
"step": 897000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3428, |
|
"step": 898000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3419, |
|
"step": 899000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3458, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.5835522342641007, |
|
"eval_loss": 2.19140625, |
|
"eval_runtime": 4531.41, |
|
"eval_samples_per_second": 140.373, |
|
"eval_steps_per_second": 2.193, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3477, |
|
"step": 901000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.345, |
|
"step": 902000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3438, |
|
"step": 903000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3572, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3532, |
|
"step": 905000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3493, |
|
"step": 906000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3419, |
|
"step": 907000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3499, |
|
"step": 908000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3388, |
|
"step": 909000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.341, |
|
"step": 910000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3441, |
|
"step": 911000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3413, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3448, |
|
"step": 913000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3382, |
|
"step": 914000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3463, |
|
"step": 915000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3373, |
|
"step": 916000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3429, |
|
"step": 917000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3547, |
|
"step": 918000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3407, |
|
"step": 919000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3362, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.341, |
|
"step": 921000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3326, |
|
"step": 922000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3431, |
|
"step": 923000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3392, |
|
"step": 924000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3372, |
|
"step": 925000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3488, |
|
"step": 926000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3382, |
|
"step": 927000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3445, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3491, |
|
"step": 929000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3487, |
|
"step": 930000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3448, |
|
"step": 931000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3407, |
|
"step": 932000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.339, |
|
"step": 933000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3402, |
|
"step": 934000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3441, |
|
"step": 935000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3357, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3493, |
|
"step": 937000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3436, |
|
"step": 938000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3453, |
|
"step": 939000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3427, |
|
"step": 940000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3394, |
|
"step": 941000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3384, |
|
"step": 942000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3375, |
|
"step": 943000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3411, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3375, |
|
"step": 945000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.33, |
|
"step": 946000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3397, |
|
"step": 947000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3365, |
|
"step": 948000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3365, |
|
"step": 949000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3355, |
|
"step": 950000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3296, |
|
"step": 951000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3364, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3384, |
|
"step": 953000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3443, |
|
"step": 954000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3459, |
|
"step": 955000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3342, |
|
"step": 956000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3411, |
|
"step": 957000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3334, |
|
"step": 958000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3428, |
|
"step": 959000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3408, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3349, |
|
"step": 961000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3406, |
|
"step": 962000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3305, |
|
"step": 963000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3381, |
|
"step": 964000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3392, |
|
"step": 965000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3371, |
|
"step": 966000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3259, |
|
"step": 967000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3422, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3402, |
|
"step": 969000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3354, |
|
"step": 970000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3368, |
|
"step": 971000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3369, |
|
"step": 972000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.33, |
|
"step": 973000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3311, |
|
"step": 974000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3476, |
|
"step": 975000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3384, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.331, |
|
"step": 977000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.331, |
|
"step": 978000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3305, |
|
"step": 979000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.341, |
|
"step": 980000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3358, |
|
"step": 981000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3383, |
|
"step": 982000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3546, |
|
"step": 983000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3664, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3786, |
|
"step": 985000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3733, |
|
"step": 986000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.385, |
|
"step": 987000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3884, |
|
"step": 988000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3883, |
|
"step": 989000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3809, |
|
"step": 990000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3811, |
|
"step": 991000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.395, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3891, |
|
"step": 993000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3968, |
|
"step": 994000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4015, |
|
"step": 995000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3996, |
|
"step": 996000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4019, |
|
"step": 997000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3913, |
|
"step": 998000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4018, |
|
"step": 999000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4003, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4019, |
|
"step": 1001000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4067, |
|
"step": 1002000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4113, |
|
"step": 1003000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.407, |
|
"step": 1004000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3967, |
|
"step": 1005000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4192, |
|
"step": 1006000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4173, |
|
"step": 1007000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4183, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4283, |
|
"step": 1009000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4155, |
|
"step": 1010000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4143, |
|
"step": 1011000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4163, |
|
"step": 1012000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4094, |
|
"step": 1013000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4105, |
|
"step": 1014000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4184, |
|
"step": 1015000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4119, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4216, |
|
"step": 1017000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4201, |
|
"step": 1018000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.414, |
|
"step": 1019000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4188, |
|
"step": 1020000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4212, |
|
"step": 1021000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4234, |
|
"step": 1022000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4205, |
|
"step": 1023000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4175, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4264, |
|
"step": 1025000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4271, |
|
"step": 1026000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4244, |
|
"step": 1027000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4271, |
|
"step": 1028000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4229, |
|
"step": 1029000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4247, |
|
"step": 1030000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4273, |
|
"step": 1031000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4278, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4232, |
|
"step": 1033000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4277, |
|
"step": 1034000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4329, |
|
"step": 1035000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4349, |
|
"step": 1036000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4195, |
|
"step": 1037000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4233, |
|
"step": 1038000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4275, |
|
"step": 1039000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4348, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4397, |
|
"step": 1041000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4249, |
|
"step": 1042000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4282, |
|
"step": 1043000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4254, |
|
"step": 1044000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4235, |
|
"step": 1045000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4315, |
|
"step": 1046000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.426, |
|
"step": 1047000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4344, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4269, |
|
"step": 1049000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4338, |
|
"step": 1050000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.5744690190170737, |
|
"eval_loss": 2.263671875, |
|
"eval_runtime": 4537.1601, |
|
"eval_samples_per_second": 140.195, |
|
"eval_steps_per_second": 2.191, |
|
"step": 1050000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.433, |
|
"step": 1051000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4197, |
|
"step": 1052000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4293, |
|
"step": 1053000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4331, |
|
"step": 1054000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4348, |
|
"step": 1055000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4413, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4263, |
|
"step": 1057000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4423, |
|
"step": 1058000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4382, |
|
"step": 1059000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4396, |
|
"step": 1060000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4401, |
|
"step": 1061000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4363, |
|
"step": 1062000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4345, |
|
"step": 1063000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4344, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4401, |
|
"step": 1065000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4341, |
|
"step": 1066000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4318, |
|
"step": 1067000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4309, |
|
"step": 1068000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4464, |
|
"step": 1069000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4363, |
|
"step": 1070000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4425, |
|
"step": 1071000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4393, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4332, |
|
"step": 1073000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4361, |
|
"step": 1074000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4362, |
|
"step": 1075000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4318, |
|
"step": 1076000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4382, |
|
"step": 1077000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4291, |
|
"step": 1078000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.445, |
|
"step": 1079000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4356, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4381, |
|
"step": 1081000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.442, |
|
"step": 1082000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4386, |
|
"step": 1083000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4339, |
|
"step": 1084000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4461, |
|
"step": 1085000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4401, |
|
"step": 1086000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4379, |
|
"step": 1087000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4378, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4359, |
|
"step": 1089000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4364, |
|
"step": 1090000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4416, |
|
"step": 1091000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4464, |
|
"step": 1092000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4335, |
|
"step": 1093000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4319, |
|
"step": 1094000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4364, |
|
"step": 1095000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4337, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.436, |
|
"step": 1097000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4354, |
|
"step": 1098000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4483, |
|
"step": 1099000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4407, |
|
"step": 1100000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4465, |
|
"step": 1101000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4479, |
|
"step": 1102000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4437, |
|
"step": 1103000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4388, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4465, |
|
"step": 1105000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.448, |
|
"step": 1106000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4379, |
|
"step": 1107000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.445, |
|
"step": 1108000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4388, |
|
"step": 1109000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.436, |
|
"step": 1110000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4404, |
|
"step": 1111000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4369, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4421, |
|
"step": 1113000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4451, |
|
"step": 1114000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4462, |
|
"step": 1115000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.44, |
|
"step": 1116000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4405, |
|
"step": 1117000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4447, |
|
"step": 1118000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4421, |
|
"step": 1119000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4524, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4429, |
|
"step": 1121000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4386, |
|
"step": 1122000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4475, |
|
"step": 1123000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5e-05, |
|
"loss": 2.445, |
|
"step": 1124000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4517, |
|
"step": 1125000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.446, |
|
"step": 1126000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4447, |
|
"step": 1127000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4461, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4457, |
|
"step": 1129000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4386, |
|
"step": 1130000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4454, |
|
"step": 1131000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.454, |
|
"step": 1132000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4392, |
|
"step": 1133000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4389, |
|
"step": 1134000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4478, |
|
"step": 1135000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4402, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4387, |
|
"step": 1137000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4421, |
|
"step": 1138000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4397, |
|
"step": 1139000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4474, |
|
"step": 1140000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4502, |
|
"step": 1141000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4471, |
|
"step": 1142000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4368, |
|
"step": 1143000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4472, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4434, |
|
"step": 1145000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4479, |
|
"step": 1146000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4504, |
|
"step": 1147000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4485, |
|
"step": 1148000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4407, |
|
"step": 1149000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4487, |
|
"step": 1150000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4456, |
|
"step": 1151000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4463, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4359, |
|
"step": 1153000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4479, |
|
"step": 1154000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4425, |
|
"step": 1155000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4485, |
|
"step": 1156000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4507, |
|
"step": 1157000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.449, |
|
"step": 1158000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4403, |
|
"step": 1159000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4432, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4453, |
|
"step": 1161000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4561, |
|
"step": 1162000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4474, |
|
"step": 1163000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4475, |
|
"step": 1164000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4445, |
|
"step": 1165000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4483, |
|
"step": 1166000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4439, |
|
"step": 1167000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4436, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4485, |
|
"step": 1169000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.451, |
|
"step": 1170000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4502, |
|
"step": 1171000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4446, |
|
"step": 1172000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4529, |
|
"step": 1173000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.45, |
|
"step": 1174000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4436, |
|
"step": 1175000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4536, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4422, |
|
"step": 1177000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4537, |
|
"step": 1178000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.446, |
|
"step": 1179000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4463, |
|
"step": 1180000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4491, |
|
"step": 1181000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4455, |
|
"step": 1182000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4466, |
|
"step": 1183000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4589, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4432, |
|
"step": 1185000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4566, |
|
"step": 1186000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4535, |
|
"step": 1187000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4558, |
|
"step": 1188000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4543, |
|
"step": 1189000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4473, |
|
"step": 1190000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4506, |
|
"step": 1191000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4444, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4341, |
|
"step": 1193000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4428, |
|
"step": 1194000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4514, |
|
"step": 1195000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4514, |
|
"step": 1196000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4538, |
|
"step": 1197000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4422, |
|
"step": 1198000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4455, |
|
"step": 1199000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.448, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.5731948419940126, |
|
"eval_loss": 2.279296875, |
|
"eval_runtime": 4533.1358, |
|
"eval_samples_per_second": 140.32, |
|
"eval_steps_per_second": 2.193, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4536, |
|
"step": 1201000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4473, |
|
"step": 1202000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4483, |
|
"step": 1203000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4516, |
|
"step": 1204000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4508, |
|
"step": 1205000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4462, |
|
"step": 1206000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4443, |
|
"step": 1207000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4477, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4552, |
|
"step": 1209000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4441, |
|
"step": 1210000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4366, |
|
"step": 1211000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4502, |
|
"step": 1212000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4434, |
|
"step": 1213000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4413, |
|
"step": 1214000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4485, |
|
"step": 1215000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4448, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4434, |
|
"step": 1217000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4505, |
|
"step": 1218000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4532, |
|
"step": 1219000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4433, |
|
"step": 1220000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.444, |
|
"step": 1221000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4528, |
|
"step": 1222000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4537, |
|
"step": 1223000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4518, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.453, |
|
"step": 1225000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4527, |
|
"step": 1226000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.45, |
|
"step": 1227000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4515, |
|
"step": 1228000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.451, |
|
"step": 1229000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4475, |
|
"step": 1230000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4525, |
|
"step": 1231000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4522, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4496, |
|
"step": 1233000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4548, |
|
"step": 1234000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4486, |
|
"step": 1235000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4499, |
|
"step": 1236000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4609, |
|
"step": 1237000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4543, |
|
"step": 1238000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4587, |
|
"step": 1239000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4502, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4547, |
|
"step": 1241000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4427, |
|
"step": 1242000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4481, |
|
"step": 1243000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4528, |
|
"step": 1244000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4512, |
|
"step": 1245000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4537, |
|
"step": 1246000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4622, |
|
"step": 1247000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4572, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4423, |
|
"step": 1249000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4502, |
|
"step": 1250000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4491, |
|
"step": 1251000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4474, |
|
"step": 1252000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4418, |
|
"step": 1253000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4471, |
|
"step": 1254000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4493, |
|
"step": 1255000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4531, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4544, |
|
"step": 1257000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4502, |
|
"step": 1258000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4457, |
|
"step": 1259000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4434, |
|
"step": 1260000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4529, |
|
"step": 1261000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4481, |
|
"step": 1262000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4427, |
|
"step": 1263000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4508, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4528, |
|
"step": 1265000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4481, |
|
"step": 1266000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4456, |
|
"step": 1267000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4483, |
|
"step": 1268000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4458, |
|
"step": 1269000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4463, |
|
"step": 1270000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4517, |
|
"step": 1271000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4485, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4547, |
|
"step": 1273000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4532, |
|
"step": 1274000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4449, |
|
"step": 1275000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4499, |
|
"step": 1276000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4534, |
|
"step": 1277000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4556, |
|
"step": 1278000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.451, |
|
"step": 1279000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4432, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4562, |
|
"step": 1281000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4352, |
|
"step": 1282000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4479, |
|
"step": 1283000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4497, |
|
"step": 1284000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4546, |
|
"step": 1285000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4498, |
|
"step": 1286000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4524, |
|
"step": 1287000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4556, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4564, |
|
"step": 1289000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4509, |
|
"step": 1290000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4478, |
|
"step": 1291000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4542, |
|
"step": 1292000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4508, |
|
"step": 1293000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4499, |
|
"step": 1294000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.443, |
|
"step": 1295000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4505, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4512, |
|
"step": 1297000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.455, |
|
"step": 1298000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4506, |
|
"step": 1299000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4508, |
|
"step": 1300000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4427, |
|
"step": 1301000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4479, |
|
"step": 1302000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4501, |
|
"step": 1303000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4427, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4547, |
|
"step": 1305000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4496, |
|
"step": 1306000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4526, |
|
"step": 1307000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4464, |
|
"step": 1308000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4545, |
|
"step": 1309000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4591, |
|
"step": 1310000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 2.448, |
|
"step": 1311000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4522, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4539, |
|
"step": 1313000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4439, |
|
"step": 1314000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4455, |
|
"step": 1315000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4492, |
|
"step": 1316000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4419, |
|
"step": 1317000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4524, |
|
"step": 1318000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4448, |
|
"step": 1319000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4492, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4325, |
|
"step": 1321000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4132, |
|
"step": 1322000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4056, |
|
"step": 1323000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4033, |
|
"step": 1324000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3908, |
|
"step": 1325000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3877, |
|
"step": 1326000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3788, |
|
"step": 1327000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3885, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3796, |
|
"step": 1329000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3758, |
|
"step": 1330000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3784, |
|
"step": 1331000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3744, |
|
"step": 1332000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3714, |
|
"step": 1333000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.37, |
|
"step": 1334000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3703, |
|
"step": 1335000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3699, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3762, |
|
"step": 1337000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3686, |
|
"step": 1338000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.367, |
|
"step": 1339000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3674, |
|
"step": 1340000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3608, |
|
"step": 1341000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3576, |
|
"step": 1342000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3544, |
|
"step": 1343000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3598, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3538, |
|
"step": 1345000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3541, |
|
"step": 1346000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3516, |
|
"step": 1347000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3544, |
|
"step": 1348000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3611, |
|
"step": 1349000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3495, |
|
"step": 1350000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_accuracy": 0.5847132515714882, |
|
"eval_loss": 2.189453125, |
|
"eval_runtime": 4583.3705, |
|
"eval_samples_per_second": 138.782, |
|
"eval_steps_per_second": 2.168, |
|
"step": 1350000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3507, |
|
"step": 1351000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3554, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.353, |
|
"step": 1353000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3447, |
|
"step": 1354000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3387, |
|
"step": 1355000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3462, |
|
"step": 1356000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3518, |
|
"step": 1357000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3423, |
|
"step": 1358000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.344, |
|
"step": 1359000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3389, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3394, |
|
"step": 1361000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3378, |
|
"step": 1362000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3493, |
|
"step": 1363000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3445, |
|
"step": 1364000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3373, |
|
"step": 1365000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3373, |
|
"step": 1366000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3432, |
|
"step": 1367000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3381, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3416, |
|
"step": 1369000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3427, |
|
"step": 1370000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3409, |
|
"step": 1371000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3388, |
|
"step": 1372000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3325, |
|
"step": 1373000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3417, |
|
"step": 1374000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3278, |
|
"step": 1375000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.34, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3412, |
|
"step": 1377000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3264, |
|
"step": 1378000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3228, |
|
"step": 1379000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3236, |
|
"step": 1380000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3253, |
|
"step": 1381000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3373, |
|
"step": 1382000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3336, |
|
"step": 1383000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3261, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3319, |
|
"step": 1385000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3242, |
|
"step": 1386000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3269, |
|
"step": 1387000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3278, |
|
"step": 1388000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3316, |
|
"step": 1389000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3355, |
|
"step": 1390000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3319, |
|
"step": 1391000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3293, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3288, |
|
"step": 1393000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3274, |
|
"step": 1394000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3314, |
|
"step": 1395000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3186, |
|
"step": 1396000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3227, |
|
"step": 1397000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.322, |
|
"step": 1398000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3138, |
|
"step": 1399000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3208, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3294, |
|
"step": 1401000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3162, |
|
"step": 1402000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3337, |
|
"step": 1403000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3245, |
|
"step": 1404000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3247, |
|
"step": 1405000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3232, |
|
"step": 1406000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3227, |
|
"step": 1407000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3232, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3197, |
|
"step": 1409000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3266, |
|
"step": 1410000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3167, |
|
"step": 1411000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3262, |
|
"step": 1412000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3147, |
|
"step": 1413000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3207, |
|
"step": 1414000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3192, |
|
"step": 1415000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3305, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3246, |
|
"step": 1417000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3256, |
|
"step": 1418000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3168, |
|
"step": 1419000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3142, |
|
"step": 1420000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3165, |
|
"step": 1421000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3179, |
|
"step": 1422000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3056, |
|
"step": 1423000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3195, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3149, |
|
"step": 1425000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3111, |
|
"step": 1426000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3164, |
|
"step": 1427000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3161, |
|
"step": 1428000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3187, |
|
"step": 1429000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3071, |
|
"step": 1430000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3068, |
|
"step": 1431000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.315, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3121, |
|
"step": 1433000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3156, |
|
"step": 1434000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3147, |
|
"step": 1435000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3135, |
|
"step": 1436000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.321, |
|
"step": 1437000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3172, |
|
"step": 1438000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3135, |
|
"step": 1439000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3141, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3186, |
|
"step": 1441000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3118, |
|
"step": 1442000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.315, |
|
"step": 1443000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3212, |
|
"step": 1444000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3052, |
|
"step": 1445000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3075, |
|
"step": 1446000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3156, |
|
"step": 1447000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3035, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3119, |
|
"step": 1449000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3113, |
|
"step": 1450000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3092, |
|
"step": 1451000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3066, |
|
"step": 1452000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2963, |
|
"step": 1453000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3148, |
|
"step": 1454000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3098, |
|
"step": 1455000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3118, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3153, |
|
"step": 1457000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3116, |
|
"step": 1458000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3158, |
|
"step": 1459000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.312, |
|
"step": 1460000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3139, |
|
"step": 1461000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3185, |
|
"step": 1462000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3024, |
|
"step": 1463000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3127, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3058, |
|
"step": 1465000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.317, |
|
"step": 1466000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2993, |
|
"step": 1467000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5e-05, |
|
"loss": 2.312, |
|
"step": 1468000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3146, |
|
"step": 1469000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3048, |
|
"step": 1470000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3039, |
|
"step": 1471000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3132, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3042, |
|
"step": 1473000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3068, |
|
"step": 1474000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3051, |
|
"step": 1475000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2953, |
|
"step": 1476000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3114, |
|
"step": 1477000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3066, |
|
"step": 1478000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3054, |
|
"step": 1479000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3057, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2906, |
|
"step": 1481000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3048, |
|
"step": 1482000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3021, |
|
"step": 1483000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2999, |
|
"step": 1484000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3112, |
|
"step": 1485000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3138, |
|
"step": 1486000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3054, |
|
"step": 1487000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3021, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3084, |
|
"step": 1489000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2948, |
|
"step": 1490000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2951, |
|
"step": 1491000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3102, |
|
"step": 1492000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3027, |
|
"step": 1493000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2981, |
|
"step": 1494000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3074, |
|
"step": 1495000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2985, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3069, |
|
"step": 1497000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2977, |
|
"step": 1498000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3008, |
|
"step": 1499000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2959, |
|
"step": 1500000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.5908897665779291, |
|
"eval_loss": 2.142578125, |
|
"eval_runtime": 4555.5415, |
|
"eval_samples_per_second": 139.63, |
|
"eval_steps_per_second": 2.182, |
|
"step": 1500000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.287, |
|
"step": 1501000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3021, |
|
"step": 1502000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2996, |
|
"step": 1503000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2913, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3057, |
|
"step": 1505000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2943, |
|
"step": 1506000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3042, |
|
"step": 1507000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2943, |
|
"step": 1508000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2872, |
|
"step": 1509000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3081, |
|
"step": 1510000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3018, |
|
"step": 1511000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2979, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.295, |
|
"step": 1513000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2926, |
|
"step": 1514000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2908, |
|
"step": 1515000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3023, |
|
"step": 1516000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5e-05, |
|
"loss": 2.299, |
|
"step": 1517000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2902, |
|
"step": 1518000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3034, |
|
"step": 1519000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2973, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2937, |
|
"step": 1521000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.296, |
|
"step": 1522000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2924, |
|
"step": 1523000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2941, |
|
"step": 1524000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2903, |
|
"step": 1525000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.299, |
|
"step": 1526000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2967, |
|
"step": 1527000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3052, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2998, |
|
"step": 1529000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2943, |
|
"step": 1530000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2938, |
|
"step": 1531000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2934, |
|
"step": 1532000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2964, |
|
"step": 1533000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2961, |
|
"step": 1534000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3046, |
|
"step": 1535000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2901, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2947, |
|
"step": 1537000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2914, |
|
"step": 1538000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2977, |
|
"step": 1539000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2945, |
|
"step": 1540000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.302, |
|
"step": 1541000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3021, |
|
"step": 1542000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2884, |
|
"step": 1543000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2904, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2868, |
|
"step": 1545000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2917, |
|
"step": 1546000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2833, |
|
"step": 1547000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2923, |
|
"step": 1548000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2978, |
|
"step": 1549000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2921, |
|
"step": 1550000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2945, |
|
"step": 1551000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2815, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2857, |
|
"step": 1553000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.289, |
|
"step": 1554000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2891, |
|
"step": 1555000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5e-05, |
|
"loss": 2.288, |
|
"step": 1556000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2973, |
|
"step": 1557000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2944, |
|
"step": 1558000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2885, |
|
"step": 1559000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2876, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3017, |
|
"step": 1561000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2916, |
|
"step": 1562000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2881, |
|
"step": 1563000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.28, |
|
"step": 1564000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2944, |
|
"step": 1565000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2932, |
|
"step": 1566000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2861, |
|
"step": 1567000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2937, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2806, |
|
"step": 1569000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2875, |
|
"step": 1570000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2932, |
|
"step": 1571000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2768, |
|
"step": 1572000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2988, |
|
"step": 1573000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2871, |
|
"step": 1574000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2954, |
|
"step": 1575000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2979, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2966, |
|
"step": 1577000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2931, |
|
"step": 1578000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2885, |
|
"step": 1579000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2843, |
|
"step": 1580000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2924, |
|
"step": 1581000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2885, |
|
"step": 1582000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.283, |
|
"step": 1583000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2894, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2874, |
|
"step": 1585000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2918, |
|
"step": 1586000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.291, |
|
"step": 1587000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2843, |
|
"step": 1588000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2826, |
|
"step": 1589000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2907, |
|
"step": 1590000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.288, |
|
"step": 1591000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2805, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2888, |
|
"step": 1593000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2864, |
|
"step": 1594000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2861, |
|
"step": 1595000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2837, |
|
"step": 1596000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2849, |
|
"step": 1597000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2866, |
|
"step": 1598000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2842, |
|
"step": 1599000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2848, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2813, |
|
"step": 1601000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.288, |
|
"step": 1602000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.282, |
|
"step": 1603000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2938, |
|
"step": 1604000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2887, |
|
"step": 1605000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2803, |
|
"step": 1606000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2835, |
|
"step": 1607000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2786, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2757, |
|
"step": 1609000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2791, |
|
"step": 1610000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.287, |
|
"step": 1611000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2795, |
|
"step": 1612000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2832, |
|
"step": 1613000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2894, |
|
"step": 1614000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2815, |
|
"step": 1615000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2759, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.283, |
|
"step": 1617000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2862, |
|
"step": 1618000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2804, |
|
"step": 1619000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2825, |
|
"step": 1620000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2738, |
|
"step": 1621000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.273, |
|
"step": 1622000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2796, |
|
"step": 1623000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2758, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2867, |
|
"step": 1625000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2816, |
|
"step": 1626000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2856, |
|
"step": 1627000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2847, |
|
"step": 1628000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2873, |
|
"step": 1629000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2795, |
|
"step": 1630000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2887, |
|
"step": 1631000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2748, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2802, |
|
"step": 1633000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2784, |
|
"step": 1634000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2739, |
|
"step": 1635000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2877, |
|
"step": 1636000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2779, |
|
"step": 1637000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2773, |
|
"step": 1638000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.28, |
|
"step": 1639000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2827, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2771, |
|
"step": 1641000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2877, |
|
"step": 1642000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2777, |
|
"step": 1643000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2723, |
|
"step": 1644000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2762, |
|
"step": 1645000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2845, |
|
"step": 1646000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2771, |
|
"step": 1647000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.278, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2737, |
|
"step": 1649000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2672, |
|
"step": 1650000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_accuracy": 0.594395614413767, |
|
"eval_loss": 2.1171875, |
|
"eval_runtime": 4580.2758, |
|
"eval_samples_per_second": 138.876, |
|
"eval_steps_per_second": 2.17, |
|
"step": 1650000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.268, |
|
"step": 1651000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2746, |
|
"step": 1652000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2792, |
|
"step": 1653000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.279, |
|
"step": 1654000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2716, |
|
"step": 1655000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2808, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2829, |
|
"step": 1657000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2804, |
|
"step": 1658000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2724, |
|
"step": 1659000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2821, |
|
"step": 1660000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2768, |
|
"step": 1661000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2779, |
|
"step": 1662000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2764, |
|
"step": 1663000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2829, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2754, |
|
"step": 1665000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2814, |
|
"step": 1666000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2717, |
|
"step": 1667000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2697, |
|
"step": 1668000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2825, |
|
"step": 1669000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2732, |
|
"step": 1670000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2749, |
|
"step": 1671000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2757, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2643, |
|
"step": 1673000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2784, |
|
"step": 1674000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.281, |
|
"step": 1675000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2727, |
|
"step": 1676000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.285, |
|
"step": 1677000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2738, |
|
"step": 1678000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2728, |
|
"step": 1679000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2696, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2707, |
|
"step": 1681000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2743, |
|
"step": 1682000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.264, |
|
"step": 1683000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2787, |
|
"step": 1684000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2674, |
|
"step": 1685000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2815, |
|
"step": 1686000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2668, |
|
"step": 1687000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2783, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2748, |
|
"step": 1689000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2711, |
|
"step": 1690000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2724, |
|
"step": 1691000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2709, |
|
"step": 1692000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2737, |
|
"step": 1693000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2872, |
|
"step": 1694000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2691, |
|
"step": 1695000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2724, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.268, |
|
"step": 1697000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2811, |
|
"step": 1698000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2744, |
|
"step": 1699000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2705, |
|
"step": 1700000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2658, |
|
"step": 1701000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2686, |
|
"step": 1702000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2756, |
|
"step": 1703000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2761, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2692, |
|
"step": 1705000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2723, |
|
"step": 1706000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2698, |
|
"step": 1707000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2638, |
|
"step": 1708000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2698, |
|
"step": 1709000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2736, |
|
"step": 1710000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2701, |
|
"step": 1711000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2622, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2625, |
|
"step": 1713000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2696, |
|
"step": 1714000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2676, |
|
"step": 1715000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2604, |
|
"step": 1716000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2723, |
|
"step": 1717000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2704, |
|
"step": 1718000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2788, |
|
"step": 1719000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2689, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2688, |
|
"step": 1721000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.273, |
|
"step": 1722000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2755, |
|
"step": 1723000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2752, |
|
"step": 1724000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2675, |
|
"step": 1725000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.27, |
|
"step": 1726000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2698, |
|
"step": 1727000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2689, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2633, |
|
"step": 1729000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2751, |
|
"step": 1730000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2645, |
|
"step": 1731000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.269, |
|
"step": 1732000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2623, |
|
"step": 1733000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2674, |
|
"step": 1734000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2608, |
|
"step": 1735000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2645, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2743, |
|
"step": 1737000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.268, |
|
"step": 1738000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2631, |
|
"step": 1739000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2689, |
|
"step": 1740000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.265, |
|
"step": 1741000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2653, |
|
"step": 1742000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2656, |
|
"step": 1743000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2641, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2687, |
|
"step": 1745000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2639, |
|
"step": 1746000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2619, |
|
"step": 1747000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2727, |
|
"step": 1748000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2638, |
|
"step": 1749000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2623, |
|
"step": 1750000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2589, |
|
"step": 1751000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2712, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2608, |
|
"step": 1753000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2619, |
|
"step": 1754000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2673, |
|
"step": 1755000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2657, |
|
"step": 1756000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2596, |
|
"step": 1757000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2616, |
|
"step": 1758000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2713, |
|
"step": 1759000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2699, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2702, |
|
"step": 1761000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2642, |
|
"step": 1762000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2668, |
|
"step": 1763000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2629, |
|
"step": 1764000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.261, |
|
"step": 1765000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2639, |
|
"step": 1766000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2629, |
|
"step": 1767000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.265, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2725, |
|
"step": 1769000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2676, |
|
"step": 1770000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2674, |
|
"step": 1771000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2598, |
|
"step": 1772000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2686, |
|
"step": 1773000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2653, |
|
"step": 1774000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2672, |
|
"step": 1775000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2697, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2682, |
|
"step": 1777000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2587, |
|
"step": 1778000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2604, |
|
"step": 1779000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2635, |
|
"step": 1780000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2573, |
|
"step": 1781000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2601, |
|
"step": 1782000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2717, |
|
"step": 1783000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.265, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2689, |
|
"step": 1785000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2669, |
|
"step": 1786000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2554, |
|
"step": 1787000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2647, |
|
"step": 1788000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.259, |
|
"step": 1789000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2628, |
|
"step": 1790000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2613, |
|
"step": 1791000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2629, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.26, |
|
"step": 1793000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2547, |
|
"step": 1794000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.256, |
|
"step": 1795000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2604, |
|
"step": 1796000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.269, |
|
"step": 1797000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2643, |
|
"step": 1798000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2643, |
|
"step": 1799000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2658, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.5967758944679928, |
|
"eval_loss": 2.103515625, |
|
"eval_runtime": 4566.2819, |
|
"eval_samples_per_second": 139.301, |
|
"eval_steps_per_second": 2.177, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2635, |
|
"step": 1801000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2641, |
|
"step": 1802000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2638, |
|
"step": 1803000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2513, |
|
"step": 1804000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.264, |
|
"step": 1805000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2626, |
|
"step": 1806000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2563, |
|
"step": 1807000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2593, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2606, |
|
"step": 1809000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2542, |
|
"step": 1810000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2531, |
|
"step": 1811000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2613, |
|
"step": 1812000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2553, |
|
"step": 1813000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2647, |
|
"step": 1814000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2572, |
|
"step": 1815000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2569, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2603, |
|
"step": 1817000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2624, |
|
"step": 1818000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2526, |
|
"step": 1819000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2613, |
|
"step": 1820000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2551, |
|
"step": 1821000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2556, |
|
"step": 1822000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2519, |
|
"step": 1823000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2496, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2647, |
|
"step": 1825000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2559, |
|
"step": 1826000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2538, |
|
"step": 1827000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2623, |
|
"step": 1828000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2558, |
|
"step": 1829000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2518, |
|
"step": 1830000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2601, |
|
"step": 1831000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2527, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2587, |
|
"step": 1833000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2674, |
|
"step": 1834000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2587, |
|
"step": 1835000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2532, |
|
"step": 1836000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2586, |
|
"step": 1837000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2579, |
|
"step": 1838000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2581, |
|
"step": 1839000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2645, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2569, |
|
"step": 1841000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2639, |
|
"step": 1842000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2627, |
|
"step": 1843000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2643, |
|
"step": 1844000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2574, |
|
"step": 1845000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.256, |
|
"step": 1846000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2574, |
|
"step": 1847000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2504, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2543, |
|
"step": 1849000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2613, |
|
"step": 1850000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2614, |
|
"step": 1851000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2564, |
|
"step": 1852000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2655, |
|
"step": 1853000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2476, |
|
"step": 1854000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2561, |
|
"step": 1855000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2623, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2472, |
|
"step": 1857000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.261, |
|
"step": 1858000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2546, |
|
"step": 1859000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2587, |
|
"step": 1860000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2576, |
|
"step": 1861000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2536, |
|
"step": 1862000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2527, |
|
"step": 1863000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2592, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.258, |
|
"step": 1865000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2542, |
|
"step": 1866000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2517, |
|
"step": 1867000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2614, |
|
"step": 1868000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2525, |
|
"step": 1869000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2488, |
|
"step": 1870000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2509, |
|
"step": 1871000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2642, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2598, |
|
"step": 1873000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2629, |
|
"step": 1874000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2537, |
|
"step": 1875000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2517, |
|
"step": 1876000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2579, |
|
"step": 1877000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2457, |
|
"step": 1878000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2506, |
|
"step": 1879000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2564, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2564, |
|
"step": 1881000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2564, |
|
"step": 1882000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2536, |
|
"step": 1883000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2429, |
|
"step": 1884000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2544, |
|
"step": 1885000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2491, |
|
"step": 1886000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2527, |
|
"step": 1887000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2486, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2482, |
|
"step": 1889000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2472, |
|
"step": 1890000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2465, |
|
"step": 1891000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2472, |
|
"step": 1892000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2524, |
|
"step": 1893000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2592, |
|
"step": 1894000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2507, |
|
"step": 1895000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2486, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2528, |
|
"step": 1897000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2456, |
|
"step": 1898000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2582, |
|
"step": 1899000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2483, |
|
"step": 1900000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2546, |
|
"step": 1901000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2491, |
|
"step": 1902000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2475, |
|
"step": 1903000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2487, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2428, |
|
"step": 1905000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2524, |
|
"step": 1906000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2587, |
|
"step": 1907000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.253, |
|
"step": 1908000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.255, |
|
"step": 1909000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2485, |
|
"step": 1910000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2547, |
|
"step": 1911000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.252, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2541, |
|
"step": 1913000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2504, |
|
"step": 1914000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2501, |
|
"step": 1915000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2462, |
|
"step": 1916000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2562, |
|
"step": 1917000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2438, |
|
"step": 1918000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2566, |
|
"step": 1919000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2463, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.253, |
|
"step": 1921000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2491, |
|
"step": 1922000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2505, |
|
"step": 1923000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2595, |
|
"step": 1924000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.242, |
|
"step": 1925000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2472, |
|
"step": 1926000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2541, |
|
"step": 1927000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2577, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2409, |
|
"step": 1929000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2457, |
|
"step": 1930000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2485, |
|
"step": 1931000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2388, |
|
"step": 1932000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.238, |
|
"step": 1933000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2509, |
|
"step": 1934000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.242, |
|
"step": 1935000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.253, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2549, |
|
"step": 1937000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2496, |
|
"step": 1938000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2539, |
|
"step": 1939000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2536, |
|
"step": 1940000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2491, |
|
"step": 1941000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2433, |
|
"step": 1942000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2435, |
|
"step": 1943000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2485, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2504, |
|
"step": 1945000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2404, |
|
"step": 1946000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2496, |
|
"step": 1947000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2498, |
|
"step": 1948000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5e-05, |
|
"loss": 2.245, |
|
"step": 1949000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2511, |
|
"step": 1950000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.5974179462628302, |
|
"eval_loss": 2.09375, |
|
"eval_runtime": 4575.295, |
|
"eval_samples_per_second": 139.027, |
|
"eval_steps_per_second": 2.172, |
|
"step": 1950000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2454, |
|
"step": 1951000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2447, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.246, |
|
"step": 1953000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2494, |
|
"step": 1954000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2503, |
|
"step": 1955000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2404, |
|
"step": 1956000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2426, |
|
"step": 1957000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2461, |
|
"step": 1958000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2424, |
|
"step": 1959000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2524, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2414, |
|
"step": 1961000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2425, |
|
"step": 1962000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2458, |
|
"step": 1963000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2477, |
|
"step": 1964000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1964054, |
|
"total_flos": 2.7512471571964887e+19, |
|
"train_loss": 0.10199262492233857, |
|
"train_runtime": 71458.7973, |
|
"train_samples_per_second": 3518.093, |
|
"train_steps_per_second": 27.485 |
|
} |
|
], |
|
"max_steps": 1964054, |
|
"num_train_epochs": 2, |
|
"total_flos": 2.7512471571964887e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|