|
{ |
|
"best_metric": 1.634265422821045, |
|
"best_model_checkpoint": "./results/checkpoint-5800", |
|
"epoch": 0.2142434988179669, |
|
"global_step": 5800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019996, |
|
"loss": 4.2732, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019992000000000002, |
|
"loss": 3.4267, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019988, |
|
"loss": 3.1308, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019984, |
|
"loss": 2.9584, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001998, |
|
"loss": 2.8321, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019976000000000003, |
|
"loss": 2.8353, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019972000000000002, |
|
"loss": 2.7275, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019968, |
|
"loss": 2.7678, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019964, |
|
"loss": 2.6752, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001996, |
|
"loss": 2.7207, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_accuracy": 0.6036520746718839, |
|
"eval_loss": 2.011291980743408, |
|
"eval_runtime": 48.3273, |
|
"eval_samples_per_second": 8.753, |
|
"eval_steps_per_second": 1.097, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019956000000000002, |
|
"loss": 2.6655, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019952000000000001, |
|
"loss": 2.5982, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019948, |
|
"loss": 2.5936, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019944, |
|
"loss": 2.625, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019940000000000002, |
|
"loss": 2.6117, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019936000000000002, |
|
"loss": 2.5167, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019932, |
|
"loss": 2.4674, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019928, |
|
"loss": 2.5246, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019924, |
|
"loss": 2.5045, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019920000000000002, |
|
"loss": 2.5222, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.5987609032363251, |
|
"eval_loss": 2.0005741119384766, |
|
"eval_runtime": 47.4839, |
|
"eval_samples_per_second": 8.908, |
|
"eval_steps_per_second": 1.116, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019916, |
|
"loss": 2.5048, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019912, |
|
"loss": 2.4859, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019908, |
|
"loss": 2.439, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019904, |
|
"loss": 2.489, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000199, |
|
"loss": 2.5181, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019896, |
|
"loss": 2.4656, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019892000000000003, |
|
"loss": 2.4849, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019888, |
|
"loss": 2.4934, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019884000000000001, |
|
"loss": 2.4359, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001988, |
|
"loss": 2.4123, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.6184886280264124, |
|
"eval_loss": 1.9406265020370483, |
|
"eval_runtime": 50.1664, |
|
"eval_samples_per_second": 8.432, |
|
"eval_steps_per_second": 1.056, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019876, |
|
"loss": 2.4585, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019872000000000002, |
|
"loss": 2.436, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019868, |
|
"loss": 2.4361, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019864, |
|
"loss": 2.4193, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001986, |
|
"loss": 2.4404, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019856000000000002, |
|
"loss": 2.4109, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019852000000000002, |
|
"loss": 2.4257, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019848, |
|
"loss": 2.423, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019844, |
|
"loss": 2.3701, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001984, |
|
"loss": 2.4469, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.6204450966006358, |
|
"eval_loss": 1.9681750535964966, |
|
"eval_runtime": 47.2918, |
|
"eval_samples_per_second": 8.944, |
|
"eval_steps_per_second": 1.121, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019836000000000002, |
|
"loss": 2.3953, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019832, |
|
"loss": 2.3848, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019828, |
|
"loss": 2.3802, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019824, |
|
"loss": 2.4174, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019820000000000002, |
|
"loss": 2.4125, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019816000000000001, |
|
"loss": 2.387, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019812, |
|
"loss": 2.3491, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019808, |
|
"loss": 2.3997, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019804, |
|
"loss": 2.3107, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019800000000000002, |
|
"loss": 2.3367, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.6377272356729436, |
|
"eval_loss": 1.8571785688400269, |
|
"eval_runtime": 47.2012, |
|
"eval_samples_per_second": 8.962, |
|
"eval_steps_per_second": 1.123, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019796, |
|
"loss": 2.3571, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019792000000000003, |
|
"loss": 2.325, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019788, |
|
"loss": 2.39, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019784, |
|
"loss": 2.3508, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001978, |
|
"loss": 2.3736, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019776, |
|
"loss": 2.2891, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019772000000000002, |
|
"loss": 2.3026, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019768, |
|
"loss": 2.2965, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019764, |
|
"loss": 2.3467, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001976, |
|
"loss": 2.2691, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.6315317518545692, |
|
"eval_loss": 1.8730525970458984, |
|
"eval_runtime": 46.6226, |
|
"eval_samples_per_second": 9.073, |
|
"eval_steps_per_second": 1.137, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019756, |
|
"loss": 2.3046, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019752000000000002, |
|
"loss": 2.2575, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019748, |
|
"loss": 2.2895, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019744, |
|
"loss": 2.2699, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001974, |
|
"loss": 2.2235, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019736000000000002, |
|
"loss": 2.2322, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019732000000000001, |
|
"loss": 2.2743, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019728, |
|
"loss": 2.2625, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019724, |
|
"loss": 2.2843, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001972, |
|
"loss": 2.2715, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.632428466617755, |
|
"eval_loss": 1.8891419172286987, |
|
"eval_runtime": 47.1404, |
|
"eval_samples_per_second": 8.973, |
|
"eval_steps_per_second": 1.124, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019716000000000002, |
|
"loss": 2.2755, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019712, |
|
"loss": 2.2597, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019708000000000003, |
|
"loss": 2.2185, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019704, |
|
"loss": 2.3046, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019700000000000002, |
|
"loss": 2.3451, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019696, |
|
"loss": 2.2956, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019692, |
|
"loss": 2.2371, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019688000000000003, |
|
"loss": 2.2705, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019684, |
|
"loss": 2.2141, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001968, |
|
"loss": 2.2344, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.6379717942447216, |
|
"eval_loss": 1.8463128805160522, |
|
"eval_runtime": 47.4067, |
|
"eval_samples_per_second": 8.923, |
|
"eval_steps_per_second": 1.118, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019676, |
|
"loss": 2.221, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019672000000000003, |
|
"loss": 2.3338, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019668000000000002, |
|
"loss": 2.1978, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019664000000000001, |
|
"loss": 2.2532, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001966, |
|
"loss": 2.2765, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019656, |
|
"loss": 2.2643, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019652000000000002, |
|
"loss": 2.2794, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019648000000000002, |
|
"loss": 2.2269, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019644, |
|
"loss": 2.2576, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001964, |
|
"loss": 2.2234, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.632183908045977, |
|
"eval_loss": 1.866058588027954, |
|
"eval_runtime": 46.4721, |
|
"eval_samples_per_second": 9.102, |
|
"eval_steps_per_second": 1.14, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019636000000000002, |
|
"loss": 2.2282, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019632000000000002, |
|
"loss": 2.2236, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019628, |
|
"loss": 2.2008, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019624, |
|
"loss": 2.2731, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001962, |
|
"loss": 2.2424, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019616000000000002, |
|
"loss": 2.1774, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019612, |
|
"loss": 2.1652, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019608, |
|
"loss": 2.1703, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019604, |
|
"loss": 2.185, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000196, |
|
"loss": 2.1818, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.6382163528164996, |
|
"eval_loss": 1.81806218624115, |
|
"eval_runtime": 60.7049, |
|
"eval_samples_per_second": 6.968, |
|
"eval_steps_per_second": 0.873, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019596000000000001, |
|
"loss": 2.1961, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019592, |
|
"loss": 2.1806, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019588000000000003, |
|
"loss": 2.1523, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019584, |
|
"loss": 2.1305, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019580000000000002, |
|
"loss": 2.1467, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019576, |
|
"loss": 2.1324, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019572, |
|
"loss": 2.211, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019568000000000002, |
|
"loss": 2.1626, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019564, |
|
"loss": 2.1327, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001956, |
|
"loss": 2.1808, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.6419662509170947, |
|
"eval_loss": 1.838297724723816, |
|
"eval_runtime": 61.0821, |
|
"eval_samples_per_second": 6.925, |
|
"eval_steps_per_second": 0.868, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019556, |
|
"loss": 2.2277, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019552000000000003, |
|
"loss": 2.1227, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019548000000000002, |
|
"loss": 2.2336, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019544, |
|
"loss": 2.1708, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001954, |
|
"loss": 2.1549, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019536, |
|
"loss": 2.1776, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019532000000000002, |
|
"loss": 2.1634, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019528000000000001, |
|
"loss": 2.1891, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019524, |
|
"loss": 2.2221, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001952, |
|
"loss": 2.1885, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.6440857585391702, |
|
"eval_loss": 1.8327720165252686, |
|
"eval_runtime": 61.1648, |
|
"eval_samples_per_second": 6.916, |
|
"eval_steps_per_second": 0.867, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019516000000000002, |
|
"loss": 2.1854, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019512000000000002, |
|
"loss": 2.1554, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019508, |
|
"loss": 2.1365, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019504, |
|
"loss": 2.1783, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000195, |
|
"loss": 2.1918, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019496000000000002, |
|
"loss": 2.2186, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019492, |
|
"loss": 2.1508, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019488000000000003, |
|
"loss": 2.1997, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019484, |
|
"loss": 2.0978, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001948, |
|
"loss": 2.1547, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.648650851879025, |
|
"eval_loss": 1.7967556715011597, |
|
"eval_runtime": 60.331, |
|
"eval_samples_per_second": 7.011, |
|
"eval_steps_per_second": 0.878, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019476, |
|
"loss": 2.1202, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019472, |
|
"loss": 2.1406, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019468000000000003, |
|
"loss": 2.149, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019464, |
|
"loss": 2.1915, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019460000000000001, |
|
"loss": 2.1721, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019456, |
|
"loss": 2.1155, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019452, |
|
"loss": 2.138, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019448000000000002, |
|
"loss": 2.0946, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019444, |
|
"loss": 2.1564, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001944, |
|
"loss": 2.1261, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.6471835004483574, |
|
"eval_loss": 1.8035624027252197, |
|
"eval_runtime": 59.6014, |
|
"eval_samples_per_second": 7.097, |
|
"eval_steps_per_second": 0.889, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019436, |
|
"loss": 2.1554, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019432000000000002, |
|
"loss": 2.1857, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019428000000000002, |
|
"loss": 2.1381, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019424, |
|
"loss": 2.1638, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001942, |
|
"loss": 2.1491, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019416, |
|
"loss": 2.209, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019412000000000002, |
|
"loss": 2.2035, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019408, |
|
"loss": 2.079, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019404, |
|
"loss": 2.1535, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000194, |
|
"loss": 2.1074, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.6522377109317682, |
|
"eval_loss": 1.7710509300231934, |
|
"eval_runtime": 45.8627, |
|
"eval_samples_per_second": 9.223, |
|
"eval_steps_per_second": 1.156, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019396000000000002, |
|
"loss": 2.0692, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019392000000000001, |
|
"loss": 2.106, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019388, |
|
"loss": 2.133, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019384, |
|
"loss": 2.0844, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001938, |
|
"loss": 2.0839, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019376000000000002, |
|
"loss": 2.1023, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019372, |
|
"loss": 2.124, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019368000000000003, |
|
"loss": 2.1142, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019364, |
|
"loss": 2.0142, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019360000000000002, |
|
"loss": 2.1537, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.6504442814053966, |
|
"eval_loss": 1.7606570720672607, |
|
"eval_runtime": 46.4971, |
|
"eval_samples_per_second": 9.097, |
|
"eval_steps_per_second": 1.14, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019356, |
|
"loss": 2.0624, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019352, |
|
"loss": 2.1308, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019348000000000002, |
|
"loss": 2.0587, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019344, |
|
"loss": 2.1031, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001934, |
|
"loss": 2.0765, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019336, |
|
"loss": 2.0972, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019332, |
|
"loss": 2.0875, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019328000000000002, |
|
"loss": 2.1277, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019323999999999999, |
|
"loss": 2.088, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001932, |
|
"loss": 2.1085, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.6466943833048014, |
|
"eval_loss": 1.7533295154571533, |
|
"eval_runtime": 45.9496, |
|
"eval_samples_per_second": 9.206, |
|
"eval_steps_per_second": 1.153, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019316, |
|
"loss": 2.159, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019312000000000002, |
|
"loss": 2.0808, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019308000000000001, |
|
"loss": 2.1481, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019304, |
|
"loss": 2.0853, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000193, |
|
"loss": 2.0692, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019296, |
|
"loss": 2.2094, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019292000000000002, |
|
"loss": 2.0793, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019288, |
|
"loss": 2.0469, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019284, |
|
"loss": 2.1105, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001928, |
|
"loss": 2.1268, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.6550093747452514, |
|
"eval_loss": 1.7450594902038574, |
|
"eval_runtime": 46.1382, |
|
"eval_samples_per_second": 9.168, |
|
"eval_steps_per_second": 1.149, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019276000000000002, |
|
"loss": 2.0895, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019272, |
|
"loss": 2.1127, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019268, |
|
"loss": 2.0435, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019264, |
|
"loss": 2.0799, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001926, |
|
"loss": 2.0886, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019256, |
|
"loss": 2.0726, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019252, |
|
"loss": 2.1095, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019248000000000003, |
|
"loss": 2.0732, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019244000000000002, |
|
"loss": 2.0577, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019240000000000001, |
|
"loss": 2.0991, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.6504442814053966, |
|
"eval_loss": 1.7744449377059937, |
|
"eval_runtime": 46.1319, |
|
"eval_samples_per_second": 9.169, |
|
"eval_steps_per_second": 1.149, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019236, |
|
"loss": 2.113, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019232, |
|
"loss": 2.0054, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019228000000000002, |
|
"loss": 2.0531, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019224000000000002, |
|
"loss": 2.0268, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001922, |
|
"loss": 2.0703, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019216, |
|
"loss": 2.0101, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019212000000000002, |
|
"loss": 2.0598, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019208000000000002, |
|
"loss": 2.0731, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019204, |
|
"loss": 2.0695, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000192, |
|
"loss": 2.0982, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.6515855547403603, |
|
"eval_loss": 1.738739252090454, |
|
"eval_runtime": 45.7921, |
|
"eval_samples_per_second": 9.237, |
|
"eval_steps_per_second": 1.157, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019196, |
|
"loss": 2.0565, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019192000000000002, |
|
"loss": 2.0358, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019188, |
|
"loss": 2.0407, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019184, |
|
"loss": 2.0584, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001918, |
|
"loss": 2.0313, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019176, |
|
"loss": 2.0852, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019172000000000001, |
|
"loss": 2.1305, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019168, |
|
"loss": 2.8885, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019164000000000003, |
|
"loss": 5.2587, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001916, |
|
"loss": 5.5295, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.31556207711746964, |
|
"eval_loss": 4.5144548416137695, |
|
"eval_runtime": 46.4657, |
|
"eval_samples_per_second": 9.103, |
|
"eval_steps_per_second": 1.141, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019156000000000002, |
|
"loss": 4.2499, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019152, |
|
"loss": 2.2078, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019148, |
|
"loss": 2.1916, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019144000000000002, |
|
"loss": 2.1247, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001914, |
|
"loss": 2.0517, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019136, |
|
"loss": 2.1537, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019132, |
|
"loss": 2.1611, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019128000000000003, |
|
"loss": 2.1015, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019124000000000002, |
|
"loss": 2.191, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001912, |
|
"loss": 2.0354, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.652645308551398, |
|
"eval_loss": 1.74555242061615, |
|
"eval_runtime": 45.8199, |
|
"eval_samples_per_second": 9.232, |
|
"eval_steps_per_second": 1.157, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019116, |
|
"loss": 2.0853, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019112, |
|
"loss": 2.0091, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019108000000000002, |
|
"loss": 2.093, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019104000000000001, |
|
"loss": 1.9993, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000191, |
|
"loss": 2.0933, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019096, |
|
"loss": 2.0847, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019092000000000002, |
|
"loss": 2.1092, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019088000000000002, |
|
"loss": 2.1634, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019084, |
|
"loss": 2.048, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001908, |
|
"loss": 2.0382, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.6545202576016956, |
|
"eval_loss": 1.7376079559326172, |
|
"eval_runtime": 46.305, |
|
"eval_samples_per_second": 9.135, |
|
"eval_steps_per_second": 1.145, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019076, |
|
"loss": 2.0105, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019072000000000002, |
|
"loss": 1.9905, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019068, |
|
"loss": 2.1397, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019064000000000003, |
|
"loss": 1.9861, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001906, |
|
"loss": 2.0603, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019056000000000002, |
|
"loss": 2.0137, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019052, |
|
"loss": 2.0223, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019048, |
|
"loss": 2.0917, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019044000000000003, |
|
"loss": 2.0426, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001904, |
|
"loss": 2.0427, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.6534605037906579, |
|
"eval_loss": 1.7286646366119385, |
|
"eval_runtime": 46.0656, |
|
"eval_samples_per_second": 9.183, |
|
"eval_steps_per_second": 1.151, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019036000000000001, |
|
"loss": 1.9983, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019032, |
|
"loss": 2.0404, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019028, |
|
"loss": 2.0165, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019024000000000002, |
|
"loss": 2.0585, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001902, |
|
"loss": 2.0517, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019016, |
|
"loss": 2.041, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019012, |
|
"loss": 1.9899, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019008000000000002, |
|
"loss": 1.9965, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019004000000000002, |
|
"loss": 2.0312, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019, |
|
"loss": 1.9543, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.6536235428385099, |
|
"eval_loss": 1.7141377925872803, |
|
"eval_runtime": 46.1754, |
|
"eval_samples_per_second": 9.161, |
|
"eval_steps_per_second": 1.148, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018996, |
|
"loss": 2.0157, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018992, |
|
"loss": 2.0398, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018988000000000002, |
|
"loss": 2.0151, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018984, |
|
"loss": 2.0104, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001898, |
|
"loss": 2.1258, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018976, |
|
"loss": 2.0673, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018972000000000002, |
|
"loss": 1.9949, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018968, |
|
"loss": 2.0162, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018964, |
|
"loss": 2.065, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001896, |
|
"loss": 1.9798, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.6568843237955491, |
|
"eval_loss": 1.7275162935256958, |
|
"eval_runtime": 46.0053, |
|
"eval_samples_per_second": 9.195, |
|
"eval_steps_per_second": 1.152, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018956, |
|
"loss": 2.0334, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018952000000000002, |
|
"loss": 2.0242, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018948, |
|
"loss": 2.0321, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018944000000000003, |
|
"loss": 2.0455, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001894, |
|
"loss": 2.0152, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018936000000000002, |
|
"loss": 2.0764, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018932, |
|
"loss": 2.0572, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018928, |
|
"loss": 1.9985, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018924000000000002, |
|
"loss": 2.0185, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001892, |
|
"loss": 2.0054, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.6598190266568843, |
|
"eval_loss": 1.6908519268035889, |
|
"eval_runtime": 45.7104, |
|
"eval_samples_per_second": 9.254, |
|
"eval_steps_per_second": 1.159, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018916, |
|
"loss": 2.0133, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018912, |
|
"loss": 2.0096, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018908000000000003, |
|
"loss": 2.0126, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018904000000000002, |
|
"loss": 1.9772, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018899999999999999, |
|
"loss": 2.0467, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018896, |
|
"loss": 1.9178, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018892, |
|
"loss": 2.0297, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018888000000000002, |
|
"loss": 2.0253, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018884000000000001, |
|
"loss": 1.9633, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001888, |
|
"loss": 2.0391, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.657128882367327, |
|
"eval_loss": 1.7396591901779175, |
|
"eval_runtime": 46.5669, |
|
"eval_samples_per_second": 9.084, |
|
"eval_steps_per_second": 1.138, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018876, |
|
"loss": 2.0096, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018872, |
|
"loss": 1.9861, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018868000000000002, |
|
"loss": 1.9337, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018864, |
|
"loss": 2.0316, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001886, |
|
"loss": 2.0544, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018856, |
|
"loss": 1.9592, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018852000000000002, |
|
"loss": 1.9677, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018848, |
|
"loss": 2.0595, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018844, |
|
"loss": 1.9924, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001884, |
|
"loss": 1.988, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.6597375071329583, |
|
"eval_loss": 1.7008874416351318, |
|
"eval_runtime": 46.6514, |
|
"eval_samples_per_second": 9.067, |
|
"eval_steps_per_second": 1.136, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018836, |
|
"loss": 2.0202, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018832, |
|
"loss": 1.9864, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018828, |
|
"loss": 1.9831, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018824000000000003, |
|
"loss": 1.9967, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001882, |
|
"loss": 1.9784, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018816000000000001, |
|
"loss": 1.9662, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018812, |
|
"loss": 1.9302, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018808, |
|
"loss": 2.0279, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018804000000000002, |
|
"loss": 2.0217, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000188, |
|
"loss": 2.0044, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.6644656395206652, |
|
"eval_loss": 1.6991885900497437, |
|
"eval_runtime": 45.8483, |
|
"eval_samples_per_second": 9.226, |
|
"eval_steps_per_second": 1.156, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018796, |
|
"loss": 2.0481, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018792, |
|
"loss": 1.9341, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018788000000000002, |
|
"loss": 1.9922, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018784000000000002, |
|
"loss": 1.9681, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001878, |
|
"loss": 1.97, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018776, |
|
"loss": 1.9463, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018772, |
|
"loss": 1.9919, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018768000000000002, |
|
"loss": 1.9762, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018764, |
|
"loss": 1.9491, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001876, |
|
"loss": 2.0099, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.6629167685660716, |
|
"eval_loss": 1.6818691492080688, |
|
"eval_runtime": 47.0155, |
|
"eval_samples_per_second": 8.997, |
|
"eval_steps_per_second": 1.127, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018756, |
|
"loss": 1.9804, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018752, |
|
"loss": 1.9109, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018748000000000001, |
|
"loss": 1.9892, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018744, |
|
"loss": 1.9546, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018740000000000003, |
|
"loss": 1.9803, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018736, |
|
"loss": 1.9629, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018732000000000002, |
|
"loss": 1.9725, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018728, |
|
"loss": 2.0243, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018724, |
|
"loss": 2.0426, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018720000000000002, |
|
"loss": 1.9622, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.6634058857096274, |
|
"eval_loss": 1.6796367168426514, |
|
"eval_runtime": 45.6554, |
|
"eval_samples_per_second": 9.265, |
|
"eval_steps_per_second": 1.161, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018716, |
|
"loss": 2.0019, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018712, |
|
"loss": 1.9587, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018708, |
|
"loss": 1.9816, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018704000000000003, |
|
"loss": 1.9877, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018700000000000002, |
|
"loss": 2.0077, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018696, |
|
"loss": 2.043, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018692, |
|
"loss": 1.9861, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018688, |
|
"loss": 1.9399, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018684000000000002, |
|
"loss": 1.9473, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018680000000000001, |
|
"loss": 1.9716, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.657128882367327, |
|
"eval_loss": 1.7205617427825928, |
|
"eval_runtime": 46.0038, |
|
"eval_samples_per_second": 9.195, |
|
"eval_steps_per_second": 1.152, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018676, |
|
"loss": 1.9388, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018672, |
|
"loss": 1.9062, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018668000000000002, |
|
"loss": 2.0283, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018664000000000002, |
|
"loss": 1.9347, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001866, |
|
"loss": 1.9422, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018656, |
|
"loss": 1.935, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018652, |
|
"loss": 1.9642, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018648000000000002, |
|
"loss": 1.9179, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018644, |
|
"loss": 1.9948, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018640000000000003, |
|
"loss": 1.9136, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.6654438738077769, |
|
"eval_loss": 1.6819649934768677, |
|
"eval_runtime": 45.6304, |
|
"eval_samples_per_second": 9.27, |
|
"eval_steps_per_second": 1.162, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018636, |
|
"loss": 1.8923, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018632000000000002, |
|
"loss": 1.9587, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018628, |
|
"loss": 1.9211, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018624, |
|
"loss": 1.9302, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018620000000000003, |
|
"loss": 1.9764, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018616, |
|
"loss": 1.9956, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018612000000000001, |
|
"loss": 1.9196, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018608, |
|
"loss": 1.955, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018604, |
|
"loss": 1.9914, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018600000000000002, |
|
"loss": 1.9277, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.6650362761881471, |
|
"eval_loss": 1.6978471279144287, |
|
"eval_runtime": 45.6548, |
|
"eval_samples_per_second": 9.265, |
|
"eval_steps_per_second": 1.161, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018596, |
|
"loss": 1.949, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018592, |
|
"loss": 1.927, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018588, |
|
"loss": 2.0058, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018584000000000002, |
|
"loss": 1.9854, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018580000000000002, |
|
"loss": 1.9646, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018576, |
|
"loss": 1.9517, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018572, |
|
"loss": 2.0103, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018568, |
|
"loss": 1.9367, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018564000000000002, |
|
"loss": 1.9968, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001856, |
|
"loss": 1.9727, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.6593299095133285, |
|
"eval_loss": 1.6777493953704834, |
|
"eval_runtime": 60.7874, |
|
"eval_samples_per_second": 6.959, |
|
"eval_steps_per_second": 0.872, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018556, |
|
"loss": 1.9233, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018552, |
|
"loss": 1.926, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018548000000000002, |
|
"loss": 1.9538, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018544, |
|
"loss": 1.9795, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001854, |
|
"loss": 1.9675, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018536, |
|
"loss": 1.8831, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018532, |
|
"loss": 1.9082, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018528000000000001, |
|
"loss": 1.9514, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018524, |
|
"loss": 1.9714, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018520000000000003, |
|
"loss": 2.0391, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.6623461318985897, |
|
"eval_loss": 1.6935285329818726, |
|
"eval_runtime": 59.3856, |
|
"eval_samples_per_second": 7.123, |
|
"eval_steps_per_second": 0.892, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018516, |
|
"loss": 1.9065, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018512000000000002, |
|
"loss": 1.9062, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018508, |
|
"loss": 1.9187, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018504, |
|
"loss": 1.9407, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018500000000000002, |
|
"loss": 1.9684, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018496, |
|
"loss": 1.9124, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018492, |
|
"loss": 1.9626, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018488, |
|
"loss": 1.8665, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018484000000000003, |
|
"loss": 1.9432, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018480000000000002, |
|
"loss": 1.9367, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.6560691285562892, |
|
"eval_loss": 1.7309006452560425, |
|
"eval_runtime": 61.2631, |
|
"eval_samples_per_second": 6.905, |
|
"eval_steps_per_second": 0.865, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018476, |
|
"loss": 1.9483, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018472, |
|
"loss": 1.9067, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018468, |
|
"loss": 1.9485, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018464000000000002, |
|
"loss": 1.9529, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018460000000000001, |
|
"loss": 1.9463, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018456, |
|
"loss": 2.0001, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018452, |
|
"loss": 1.8994, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018448, |
|
"loss": 1.8648, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018444000000000002, |
|
"loss": 1.9028, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001844, |
|
"loss": 1.9146, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.6616939757071819, |
|
"eval_loss": 1.7054038047790527, |
|
"eval_runtime": 60.7176, |
|
"eval_samples_per_second": 6.967, |
|
"eval_steps_per_second": 0.873, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018436, |
|
"loss": 1.945, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018432, |
|
"loss": 1.9027, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018428000000000002, |
|
"loss": 1.959, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018424, |
|
"loss": 1.9469, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001842, |
|
"loss": 1.9149, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018416, |
|
"loss": 1.9151, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018412, |
|
"loss": 1.9054, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018408, |
|
"loss": 1.8939, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018404, |
|
"loss": 1.9331, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018400000000000003, |
|
"loss": 1.957, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.6670742642862966, |
|
"eval_loss": 1.6463383436203003, |
|
"eval_runtime": 46.0232, |
|
"eval_samples_per_second": 9.191, |
|
"eval_steps_per_second": 1.152, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018396, |
|
"loss": 1.9497, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018392000000000001, |
|
"loss": 1.9271, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018388, |
|
"loss": 1.9087, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018384, |
|
"loss": 1.9161, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018380000000000002, |
|
"loss": 1.9423, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018376, |
|
"loss": 1.8995, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018372, |
|
"loss": 1.9421, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018368, |
|
"loss": 1.9013, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018364000000000002, |
|
"loss": 1.9092, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018360000000000002, |
|
"loss": 1.882, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.6641395614249613, |
|
"eval_loss": 1.6783875226974487, |
|
"eval_runtime": 47.0079, |
|
"eval_samples_per_second": 8.998, |
|
"eval_steps_per_second": 1.127, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018356, |
|
"loss": 1.9424, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018352, |
|
"loss": 1.9477, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018348, |
|
"loss": 1.9255, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018344000000000002, |
|
"loss": 1.9456, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001834, |
|
"loss": 1.9288, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018336, |
|
"loss": 1.9088, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018332, |
|
"loss": 1.959, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018328000000000002, |
|
"loss": 1.9619, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018324000000000001, |
|
"loss": 1.9511, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001832, |
|
"loss": 1.9489, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.6603081438004402, |
|
"eval_loss": 1.676958680152893, |
|
"eval_runtime": 46.3451, |
|
"eval_samples_per_second": 9.127, |
|
"eval_steps_per_second": 1.144, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018316, |
|
"loss": 1.8862, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018312, |
|
"loss": 1.8631, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018308000000000002, |
|
"loss": 1.9649, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018304, |
|
"loss": 1.9592, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000183, |
|
"loss": 1.9317, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018296, |
|
"loss": 1.9138, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018292, |
|
"loss": 1.8876, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018288, |
|
"loss": 1.859, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018284, |
|
"loss": 1.9496, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018280000000000003, |
|
"loss": 1.9407, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.669601369528002, |
|
"eval_loss": 1.641465187072754, |
|
"eval_runtime": 46.8321, |
|
"eval_samples_per_second": 9.032, |
|
"eval_steps_per_second": 1.132, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018276, |
|
"loss": 1.9654, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018272, |
|
"loss": 1.9223, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018268, |
|
"loss": 1.9163, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018264, |
|
"loss": 1.9175, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018260000000000002, |
|
"loss": 1.9255, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018256, |
|
"loss": 1.8736, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018252, |
|
"loss": 1.8851, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018248, |
|
"loss": 1.9125, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018244000000000002, |
|
"loss": 1.888, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018240000000000002, |
|
"loss": 1.9402, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.6654438738077769, |
|
"eval_loss": 1.681833267211914, |
|
"eval_runtime": 46.6168, |
|
"eval_samples_per_second": 9.074, |
|
"eval_steps_per_second": 1.137, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018236, |
|
"loss": 1.8771, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018232, |
|
"loss": 1.9207, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018228, |
|
"loss": 1.8753, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018224000000000002, |
|
"loss": 1.8738, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001822, |
|
"loss": 1.9238, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018216000000000003, |
|
"loss": 1.8845, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018212, |
|
"loss": 1.9076, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018208000000000002, |
|
"loss": 1.9064, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018204, |
|
"loss": 1.9555, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000182, |
|
"loss": 1.894, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.6659329909513328, |
|
"eval_loss": 1.664554476737976, |
|
"eval_runtime": 46.5128, |
|
"eval_samples_per_second": 9.094, |
|
"eval_steps_per_second": 1.139, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018196000000000003, |
|
"loss": 1.9158, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018192, |
|
"loss": 1.9474, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018188000000000001, |
|
"loss": 1.9463, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018184, |
|
"loss": 1.8755, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018180000000000003, |
|
"loss": 1.9637, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018176000000000002, |
|
"loss": 1.9052, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018172, |
|
"loss": 1.9231, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018168, |
|
"loss": 1.9216, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018164, |
|
"loss": 1.8753, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018160000000000002, |
|
"loss": 1.9149, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.6674818619059265, |
|
"eval_loss": 1.6451318264007568, |
|
"eval_runtime": 46.9277, |
|
"eval_samples_per_second": 9.014, |
|
"eval_steps_per_second": 1.129, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018156000000000002, |
|
"loss": 1.8871, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018152, |
|
"loss": 1.9246, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018148, |
|
"loss": 1.9086, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018144, |
|
"loss": 1.9105, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018140000000000002, |
|
"loss": 1.9686, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018136, |
|
"loss": 1.8944, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018132, |
|
"loss": 1.8953, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018128, |
|
"loss": 1.898, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018124000000000002, |
|
"loss": 1.934, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001812, |
|
"loss": 1.8868, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.6653623542838509, |
|
"eval_loss": 1.6440030336380005, |
|
"eval_runtime": 47.0909, |
|
"eval_samples_per_second": 8.983, |
|
"eval_steps_per_second": 1.125, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018116, |
|
"loss": 1.8573, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018112, |
|
"loss": 1.8907, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018108, |
|
"loss": 1.8429, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018104000000000001, |
|
"loss": 1.8393, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.000181, |
|
"loss": 2.0048, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018096000000000003, |
|
"loss": 1.8358, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018092, |
|
"loss": 1.9148, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018088000000000002, |
|
"loss": 1.8835, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018084, |
|
"loss": 1.8832, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001808, |
|
"loss": 1.9157, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.6643841199967392, |
|
"eval_loss": 1.6713789701461792, |
|
"eval_runtime": 47.3501, |
|
"eval_samples_per_second": 8.933, |
|
"eval_steps_per_second": 1.119, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018076000000000002, |
|
"loss": 1.9131, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018072, |
|
"loss": 1.8841, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018068, |
|
"loss": 1.9479, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018064, |
|
"loss": 1.9109, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018060000000000003, |
|
"loss": 1.9122, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018056000000000002, |
|
"loss": 1.867, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018052, |
|
"loss": 1.9297, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018048, |
|
"loss": 1.8987, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018044, |
|
"loss": 1.8791, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018040000000000002, |
|
"loss": 1.884, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.669764408575854, |
|
"eval_loss": 1.6372376680374146, |
|
"eval_runtime": 45.8594, |
|
"eval_samples_per_second": 9.224, |
|
"eval_steps_per_second": 1.156, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018036000000000001, |
|
"loss": 1.9257, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018032, |
|
"loss": 1.8184, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018028, |
|
"loss": 1.9063, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018024, |
|
"loss": 1.9058, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018020000000000002, |
|
"loss": 1.8876, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018016, |
|
"loss": 1.8873, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018012, |
|
"loss": 1.8623, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018008, |
|
"loss": 1.8649, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018004000000000002, |
|
"loss": 1.9113, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018, |
|
"loss": 1.861, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.6632428466617755, |
|
"eval_loss": 1.6630749702453613, |
|
"eval_runtime": 46.3295, |
|
"eval_samples_per_second": 9.13, |
|
"eval_steps_per_second": 1.144, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017996, |
|
"loss": 1.95, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017992, |
|
"loss": 1.8517, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017988, |
|
"loss": 1.8582, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017984, |
|
"loss": 1.8658, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001798, |
|
"loss": 1.9111, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017976000000000003, |
|
"loss": 1.8874, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017972, |
|
"loss": 1.8853, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017968000000000001, |
|
"loss": 1.8797, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017964, |
|
"loss": 1.8727, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001796, |
|
"loss": 1.9064, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.6621015733268117, |
|
"eval_loss": 1.689902424812317, |
|
"eval_runtime": 47.432, |
|
"eval_samples_per_second": 8.918, |
|
"eval_steps_per_second": 1.117, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017956000000000002, |
|
"loss": 1.8958, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017952, |
|
"loss": 1.8912, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017948, |
|
"loss": 1.8342, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017944, |
|
"loss": 1.864, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017940000000000002, |
|
"loss": 1.8842, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017936000000000002, |
|
"loss": 1.8981, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017932, |
|
"loss": 1.8746, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017928, |
|
"loss": 1.8151, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017924, |
|
"loss": 1.8965, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017920000000000002, |
|
"loss": 1.9178, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.669682889051928, |
|
"eval_loss": 1.654188632965088, |
|
"eval_runtime": 47.2488, |
|
"eval_samples_per_second": 8.953, |
|
"eval_steps_per_second": 1.122, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017916, |
|
"loss": 1.8615, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017912, |
|
"loss": 1.8374, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017908, |
|
"loss": 1.9621, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017904000000000002, |
|
"loss": 1.8499, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017900000000000001, |
|
"loss": 1.8441, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017896, |
|
"loss": 1.9126, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00017892, |
|
"loss": 1.874, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017888, |
|
"loss": 1.9262, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017884000000000002, |
|
"loss": 1.8436, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001788, |
|
"loss": 1.9086, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.6702535257194098, |
|
"eval_loss": 1.6423653364181519, |
|
"eval_runtime": 47.3319, |
|
"eval_samples_per_second": 8.937, |
|
"eval_steps_per_second": 1.12, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017876, |
|
"loss": 1.9283, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017872, |
|
"loss": 1.8879, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017868, |
|
"loss": 1.9249, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017864, |
|
"loss": 1.866, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001786, |
|
"loss": 1.8631, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017856000000000003, |
|
"loss": 1.8883, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017852, |
|
"loss": 1.9085, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017848, |
|
"loss": 1.9506, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017844, |
|
"loss": 1.894, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001784, |
|
"loss": 1.9128, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.6704165647672617, |
|
"eval_loss": 1.65791916847229, |
|
"eval_runtime": 46.7984, |
|
"eval_samples_per_second": 9.039, |
|
"eval_steps_per_second": 1.133, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017836000000000002, |
|
"loss": 1.8842, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017832, |
|
"loss": 1.8935, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017828, |
|
"loss": 1.84, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017824, |
|
"loss": 1.8379, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017820000000000002, |
|
"loss": 1.8661, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017816000000000002, |
|
"loss": 1.8418, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017812, |
|
"loss": 1.8773, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017808, |
|
"loss": 1.939, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017804, |
|
"loss": 1.911, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017800000000000002, |
|
"loss": 1.8751, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.6681340180973343, |
|
"eval_loss": 1.6451387405395508, |
|
"eval_runtime": 46.943, |
|
"eval_samples_per_second": 9.011, |
|
"eval_steps_per_second": 1.129, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017796, |
|
"loss": 1.8385, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017792, |
|
"loss": 1.9321, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017788, |
|
"loss": 1.8533, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017784000000000002, |
|
"loss": 1.8428, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001778, |
|
"loss": 1.8833, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017776, |
|
"loss": 1.8387, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017772, |
|
"loss": 1.8742, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017768, |
|
"loss": 1.8835, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017764000000000001, |
|
"loss": 1.9084, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001776, |
|
"loss": 1.8289, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.6712317600065215, |
|
"eval_loss": 1.649842619895935, |
|
"eval_runtime": 47.4408, |
|
"eval_samples_per_second": 8.916, |
|
"eval_steps_per_second": 1.117, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017756000000000003, |
|
"loss": 1.9069, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017752, |
|
"loss": 1.8689, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017748000000000002, |
|
"loss": 1.8327, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017744, |
|
"loss": 1.8061, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001774, |
|
"loss": 1.8382, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017736000000000002, |
|
"loss": 1.8691, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017732000000000002, |
|
"loss": 1.8348, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017728, |
|
"loss": 1.8645, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017724, |
|
"loss": 1.9094, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001772, |
|
"loss": 1.8597, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.669601369528002, |
|
"eval_loss": 1.6287590265274048, |
|
"eval_runtime": 46.725, |
|
"eval_samples_per_second": 9.053, |
|
"eval_steps_per_second": 1.134, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017716000000000002, |
|
"loss": 1.8063, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017712, |
|
"loss": 1.8771, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017708, |
|
"loss": 1.9203, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017704, |
|
"loss": 1.9039, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017700000000000002, |
|
"loss": 1.8713, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017696, |
|
"loss": 1.8446, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017692, |
|
"loss": 1.8771, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017688, |
|
"loss": 1.8959, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017684, |
|
"loss": 1.915, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017680000000000001, |
|
"loss": 1.8873, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.6718023966740034, |
|
"eval_loss": 1.634265422821045, |
|
"eval_runtime": 47.1163, |
|
"eval_samples_per_second": 8.978, |
|
"eval_steps_per_second": 1.125, |
|
"step": 5800 |
|
} |
|
], |
|
"max_steps": 50000, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.066306262532096e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|