|
{ |
|
"best_metric": 0.9007891770011274, |
|
"best_model_checkpoint": "swin-base-patch4-window7-224-in22k-finetuned-lora-ISIC-2019/checkpoint-6000", |
|
"epoch": 99.2, |
|
"eval_steps": 500, |
|
"global_step": 6200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009983870967741936, |
|
"loss": 1.5414, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0009967741935483871, |
|
"loss": 1.079, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0009951612903225807, |
|
"loss": 0.9569, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0009935483870967743, |
|
"loss": 0.9056, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0009919354838709678, |
|
"loss": 0.8682, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0009903225806451614, |
|
"loss": 0.858, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.733934611048478, |
|
"eval_loss": 0.7348718047142029, |
|
"eval_runtime": 45.7088, |
|
"eval_samples_per_second": 38.811, |
|
"eval_steps_per_second": 0.613, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.000988709677419355, |
|
"loss": 0.8064, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0009870967741935483, |
|
"loss": 0.7615, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.000985483870967742, |
|
"loss": 0.7629, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0009838709677419356, |
|
"loss": 0.7456, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0009822580645161292, |
|
"loss": 0.7168, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0009806451612903225, |
|
"loss": 0.7403, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7762119503945885, |
|
"eval_loss": 0.6363512873649597, |
|
"eval_runtime": 46.1661, |
|
"eval_samples_per_second": 38.426, |
|
"eval_steps_per_second": 0.607, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.000979032258064516, |
|
"loss": 0.7194, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0009774193548387096, |
|
"loss": 0.6984, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0009758064516129033, |
|
"loss": 0.6867, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0009741935483870968, |
|
"loss": 0.6767, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0009725806451612903, |
|
"loss": 0.6604, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.0009709677419354839, |
|
"loss": 0.675, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.7998872604284104, |
|
"eval_loss": 0.5776907801628113, |
|
"eval_runtime": 36.9833, |
|
"eval_samples_per_second": 47.968, |
|
"eval_steps_per_second": 0.757, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0009693548387096774, |
|
"loss": 0.6548, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.000967741935483871, |
|
"loss": 0.6551, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.0009661290322580646, |
|
"loss": 0.6212, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.0009645161290322581, |
|
"loss": 0.6351, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.0009629032258064516, |
|
"loss": 0.6244, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0009612903225806452, |
|
"loss": 0.6073, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0009596774193548388, |
|
"loss": 0.6309, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7874859075535513, |
|
"eval_loss": 0.570148229598999, |
|
"eval_runtime": 29.3907, |
|
"eval_samples_per_second": 60.359, |
|
"eval_steps_per_second": 0.953, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0009580645161290322, |
|
"loss": 0.5976, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.0009564516129032258, |
|
"loss": 0.5955, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0009548387096774193, |
|
"loss": 0.5959, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.000953225806451613, |
|
"loss": 0.5927, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0009516129032258065, |
|
"loss": 0.5776, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00095, |
|
"loss": 0.5734, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.8015783540022547, |
|
"eval_loss": 0.5293604135513306, |
|
"eval_runtime": 29.4268, |
|
"eval_samples_per_second": 60.285, |
|
"eval_steps_per_second": 0.952, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.0009483870967741936, |
|
"loss": 0.5632, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.0009467741935483871, |
|
"loss": 0.5503, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.0009451612903225807, |
|
"loss": 0.5658, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.0009435483870967742, |
|
"loss": 0.545, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.0009419354838709677, |
|
"loss": 0.5667, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.0009403225806451613, |
|
"loss": 0.5338, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8010146561443067, |
|
"eval_loss": 0.5417840480804443, |
|
"eval_runtime": 29.4769, |
|
"eval_samples_per_second": 60.183, |
|
"eval_steps_per_second": 0.95, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.0009387096774193549, |
|
"loss": 0.5827, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.0009370967741935485, |
|
"loss": 0.5337, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0009354838709677419, |
|
"loss": 0.5306, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.0009338709677419355, |
|
"loss": 0.5738, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 0.000932258064516129, |
|
"loss": 0.5418, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.0009308064516129033, |
|
"loss": 0.5104, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.8179255918827508, |
|
"eval_loss": 0.5056995749473572, |
|
"eval_runtime": 29.8376, |
|
"eval_samples_per_second": 59.455, |
|
"eval_steps_per_second": 0.938, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.0009291935483870968, |
|
"loss": 0.5137, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0009275806451612904, |
|
"loss": 0.5276, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.000925967741935484, |
|
"loss": 0.5192, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.0009243548387096774, |
|
"loss": 0.5012, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.000922741935483871, |
|
"loss": 0.519, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.0009211290322580645, |
|
"loss": 0.5067, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0009195161290322581, |
|
"loss": 0.5091, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8207440811724915, |
|
"eval_loss": 0.5009720325469971, |
|
"eval_runtime": 29.8933, |
|
"eval_samples_per_second": 59.344, |
|
"eval_steps_per_second": 0.937, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 0.0009179032258064516, |
|
"loss": 0.4937, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.0009162903225806452, |
|
"loss": 0.4894, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 0.0009146774193548387, |
|
"loss": 0.4887, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 0.0009130645161290323, |
|
"loss": 0.5252, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.0009114516129032259, |
|
"loss": 0.5007, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.0009098387096774193, |
|
"loss": 0.4678, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.8246899661781285, |
|
"eval_loss": 0.4757310152053833, |
|
"eval_runtime": 29.4337, |
|
"eval_samples_per_second": 60.271, |
|
"eval_steps_per_second": 0.951, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.0009082258064516129, |
|
"loss": 0.4527, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 0.0009066129032258064, |
|
"loss": 0.4553, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 0.0009050000000000001, |
|
"loss": 0.4927, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0009033870967741937, |
|
"loss": 0.4415, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 0.0009017741935483871, |
|
"loss": 0.4628, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 0.0009001612903225807, |
|
"loss": 0.467, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8151071025930101, |
|
"eval_loss": 0.4579251706600189, |
|
"eval_runtime": 29.9049, |
|
"eval_samples_per_second": 59.321, |
|
"eval_steps_per_second": 0.936, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 0.0008985483870967742, |
|
"loss": 0.4398, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 0.0008969354838709678, |
|
"loss": 0.4604, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 0.0008953225806451612, |
|
"loss": 0.4375, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 0.0008937096774193548, |
|
"loss": 0.4526, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 0.0008920967741935483, |
|
"loss": 0.4249, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 0.000890483870967742, |
|
"loss": 0.4416, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.8314543404735062, |
|
"eval_loss": 0.4649556279182434, |
|
"eval_runtime": 29.2149, |
|
"eval_samples_per_second": 60.722, |
|
"eval_steps_per_second": 0.958, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 0.0008888709677419356, |
|
"loss": 0.4438, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 0.000887258064516129, |
|
"loss": 0.404, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 0.0008856451612903226, |
|
"loss": 0.4364, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 0.0008840322580645161, |
|
"loss": 0.4046, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 0.0008824193548387097, |
|
"loss": 0.4116, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 0.0008808064516129033, |
|
"loss": 0.4447, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0008791935483870967, |
|
"loss": 0.4277, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8404735062006764, |
|
"eval_loss": 0.440464586019516, |
|
"eval_runtime": 29.0063, |
|
"eval_samples_per_second": 61.159, |
|
"eval_steps_per_second": 0.965, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 0.0008775806451612904, |
|
"loss": 0.4078, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 0.0008759677419354839, |
|
"loss": 0.4216, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 0.0008743548387096775, |
|
"loss": 0.4011, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 0.000872741935483871, |
|
"loss": 0.4047, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.0008711290322580645, |
|
"loss": 0.4297, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 0.000869516129032258, |
|
"loss": 0.4261, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.8387824126268321, |
|
"eval_loss": 0.44135671854019165, |
|
"eval_runtime": 29.101, |
|
"eval_samples_per_second": 60.96, |
|
"eval_steps_per_second": 0.962, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 0.0008679032258064516, |
|
"loss": 0.4045, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 0.0008662903225806452, |
|
"loss": 0.3814, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 0.0008646774193548387, |
|
"loss": 0.3865, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 0.0008630645161290323, |
|
"loss": 0.3665, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 0.0008614516129032258, |
|
"loss": 0.3792, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 0.0008598387096774194, |
|
"loss": 0.4016, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8286358511837655, |
|
"eval_loss": 0.43920814990997314, |
|
"eval_runtime": 28.9441, |
|
"eval_samples_per_second": 61.291, |
|
"eval_steps_per_second": 0.967, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 0.000858225806451613, |
|
"loss": 0.3718, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 0.0008566129032258064, |
|
"loss": 0.38, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 0.000855, |
|
"loss": 0.3489, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"learning_rate": 0.0008533870967741935, |
|
"loss": 0.3882, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 0.0008517741935483872, |
|
"loss": 0.3825, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 0.0008501612903225807, |
|
"loss": 0.3729, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.8280721533258174, |
|
"eval_loss": 0.4471096396446228, |
|
"eval_runtime": 28.7947, |
|
"eval_samples_per_second": 61.609, |
|
"eval_steps_per_second": 0.972, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 0.0008485483870967742, |
|
"loss": 0.4284, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 0.0008469354838709678, |
|
"loss": 0.395, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 0.0008453225806451613, |
|
"loss": 0.3531, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 0.0008437096774193549, |
|
"loss": 0.354, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"learning_rate": 0.0008420967741935483, |
|
"loss": 0.3698, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 0.0008404838709677419, |
|
"loss": 0.3465, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0008388709677419355, |
|
"loss": 0.3813, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8432919954904171, |
|
"eval_loss": 0.41551458835601807, |
|
"eval_runtime": 29.7739, |
|
"eval_samples_per_second": 59.582, |
|
"eval_steps_per_second": 0.94, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.16, |
|
"learning_rate": 0.0008372580645161291, |
|
"loss": 0.3449, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 0.0008356451612903227, |
|
"loss": 0.3651, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"learning_rate": 0.0008340322580645161, |
|
"loss": 0.3519, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 0.0008324193548387097, |
|
"loss": 0.3669, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 0.0008308064516129032, |
|
"loss": 0.3629, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 0.0008291935483870968, |
|
"loss": 0.3454, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.8365276211950394, |
|
"eval_loss": 0.4322459399700165, |
|
"eval_runtime": 29.8666, |
|
"eval_samples_per_second": 59.397, |
|
"eval_steps_per_second": 0.938, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"learning_rate": 0.0008275806451612903, |
|
"loss": 0.3199, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 0.0008259677419354839, |
|
"loss": 0.3138, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 17.44, |
|
"learning_rate": 0.0008243548387096775, |
|
"loss": 0.3293, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 0.000822741935483871, |
|
"loss": 0.3411, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"learning_rate": 0.0008211290322580646, |
|
"loss": 0.3326, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 0.000819516129032258, |
|
"loss": 0.3639, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8359639233370914, |
|
"eval_loss": 0.4332345724105835, |
|
"eval_runtime": 29.3927, |
|
"eval_samples_per_second": 60.355, |
|
"eval_steps_per_second": 0.953, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 0.0008179032258064516, |
|
"loss": 0.3441, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 0.0008162903225806451, |
|
"loss": 0.2963, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 0.0008146774193548387, |
|
"loss": 0.2963, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 0.0008130645161290324, |
|
"loss": 0.3363, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"learning_rate": 0.0008114516129032258, |
|
"loss": 0.3429, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 18.88, |
|
"learning_rate": 0.0008098387096774194, |
|
"loss": 0.3393, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.8523111612175873, |
|
"eval_loss": 0.4190393388271332, |
|
"eval_runtime": 29.5495, |
|
"eval_samples_per_second": 60.035, |
|
"eval_steps_per_second": 0.948, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 0.0008082258064516129, |
|
"loss": 0.3017, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.0008066129032258065, |
|
"loss": 0.3258, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 19.36, |
|
"learning_rate": 0.0008051612903225806, |
|
"loss": 0.3028, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 0.0008035483870967743, |
|
"loss": 0.3226, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 19.68, |
|
"learning_rate": 0.0008019354838709677, |
|
"loss": 0.3502, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"learning_rate": 0.0008003225806451613, |
|
"loss": 0.3132, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0007987096774193549, |
|
"loss": 0.3135, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8534385569334837, |
|
"eval_loss": 0.41664600372314453, |
|
"eval_runtime": 29.6803, |
|
"eval_samples_per_second": 59.77, |
|
"eval_steps_per_second": 0.943, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"learning_rate": 0.0007970967741935484, |
|
"loss": 0.2775, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"learning_rate": 0.000795483870967742, |
|
"loss": 0.284, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 20.48, |
|
"learning_rate": 0.0007938709677419354, |
|
"loss": 0.2786, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 20.64, |
|
"learning_rate": 0.000792258064516129, |
|
"loss": 0.3094, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 0.0007906451612903227, |
|
"loss": 0.3377, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"learning_rate": 0.0007890322580645162, |
|
"loss": 0.3094, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.8562570462232244, |
|
"eval_loss": 0.4004518687725067, |
|
"eval_runtime": 29.6498, |
|
"eval_samples_per_second": 59.832, |
|
"eval_steps_per_second": 0.944, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 21.12, |
|
"learning_rate": 0.0007874193548387098, |
|
"loss": 0.2895, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 0.0007858064516129032, |
|
"loss": 0.296, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"learning_rate": 0.0007841935483870968, |
|
"loss": 0.275, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 0.0007825806451612903, |
|
"loss": 0.3178, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"learning_rate": 0.0007809677419354839, |
|
"loss": 0.2489, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 21.92, |
|
"learning_rate": 0.0007793548387096773, |
|
"loss": 0.3263, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8494926719278467, |
|
"eval_loss": 0.4399039149284363, |
|
"eval_runtime": 29.8845, |
|
"eval_samples_per_second": 59.362, |
|
"eval_steps_per_second": 0.937, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 22.08, |
|
"learning_rate": 0.0007777419354838709, |
|
"loss": 0.2737, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 22.24, |
|
"learning_rate": 0.0007761290322580646, |
|
"loss": 0.2847, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 0.0007745161290322581, |
|
"loss": 0.2611, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 22.56, |
|
"learning_rate": 0.0007729032258064517, |
|
"loss": 0.2686, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 22.72, |
|
"learning_rate": 0.0007712903225806451, |
|
"loss": 0.2846, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 22.88, |
|
"learning_rate": 0.0007696774193548387, |
|
"loss": 0.3009, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"eval_accuracy": 0.8523111612175873, |
|
"eval_loss": 0.4121840298175812, |
|
"eval_runtime": 29.94, |
|
"eval_samples_per_second": 59.252, |
|
"eval_steps_per_second": 0.935, |
|
"step": 1437 |
|
}, |
|
{ |
|
"epoch": 23.04, |
|
"learning_rate": 0.0007680645161290323, |
|
"loss": 0.2728, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 0.0007664516129032258, |
|
"loss": 0.2544, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"learning_rate": 0.0007648387096774194, |
|
"loss": 0.2788, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 23.52, |
|
"learning_rate": 0.0007632258064516129, |
|
"loss": 0.2538, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"learning_rate": 0.0007616129032258065, |
|
"loss": 0.2724, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 23.84, |
|
"learning_rate": 0.00076, |
|
"loss": 0.2891, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.0007583870967741936, |
|
"loss": 0.2804, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8562570462232244, |
|
"eval_loss": 0.429273396730423, |
|
"eval_runtime": 29.8298, |
|
"eval_samples_per_second": 59.471, |
|
"eval_steps_per_second": 0.939, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.16, |
|
"learning_rate": 0.000756774193548387, |
|
"loss": 0.2681, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 0.0007551612903225806, |
|
"loss": 0.2599, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 24.48, |
|
"learning_rate": 0.0007535483870967742, |
|
"loss": 0.2698, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 24.64, |
|
"learning_rate": 0.0007519354838709677, |
|
"loss": 0.2737, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 0.0007503225806451614, |
|
"loss": 0.264, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 24.96, |
|
"learning_rate": 0.0007487096774193548, |
|
"loss": 0.2516, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.8562570462232244, |
|
"eval_loss": 0.42893821001052856, |
|
"eval_runtime": 29.8289, |
|
"eval_samples_per_second": 59.473, |
|
"eval_steps_per_second": 0.939, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 25.12, |
|
"learning_rate": 0.0007470967741935484, |
|
"loss": 0.2671, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 25.28, |
|
"learning_rate": 0.000745483870967742, |
|
"loss": 0.2435, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 25.44, |
|
"learning_rate": 0.0007438709677419355, |
|
"loss": 0.2477, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 0.0007422580645161291, |
|
"loss": 0.2631, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 25.76, |
|
"learning_rate": 0.0007406451612903225, |
|
"loss": 0.2423, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 25.92, |
|
"learning_rate": 0.0007390322580645161, |
|
"loss": 0.2763, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8647125140924464, |
|
"eval_loss": 0.41249939799308777, |
|
"eval_runtime": 29.4606, |
|
"eval_samples_per_second": 60.216, |
|
"eval_steps_per_second": 0.95, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 26.08, |
|
"learning_rate": 0.0007374193548387097, |
|
"loss": 0.2435, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 26.24, |
|
"learning_rate": 0.0007358064516129033, |
|
"loss": 0.2289, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"learning_rate": 0.0007341935483870969, |
|
"loss": 0.2346, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 26.56, |
|
"learning_rate": 0.0007325806451612903, |
|
"loss": 0.2556, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"learning_rate": 0.0007309677419354839, |
|
"loss": 0.2724, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"learning_rate": 0.0007293548387096774, |
|
"loss": 0.2707, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_accuracy": 0.8664036076662909, |
|
"eval_loss": 0.42308202385902405, |
|
"eval_runtime": 29.606, |
|
"eval_samples_per_second": 59.92, |
|
"eval_steps_per_second": 0.946, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 27.04, |
|
"learning_rate": 0.000727741935483871, |
|
"loss": 0.265, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 0.0007261290322580644, |
|
"loss": 0.2335, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"learning_rate": 0.0007245161290322581, |
|
"loss": 0.2423, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 27.52, |
|
"learning_rate": 0.0007229032258064517, |
|
"loss": 0.2363, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 27.68, |
|
"learning_rate": 0.0007212903225806452, |
|
"loss": 0.2543, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 27.84, |
|
"learning_rate": 0.0007196774193548388, |
|
"loss": 0.2564, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 0.0007180645161290322, |
|
"loss": 0.2585, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8596392333709132, |
|
"eval_loss": 0.420967698097229, |
|
"eval_runtime": 29.7608, |
|
"eval_samples_per_second": 59.609, |
|
"eval_steps_per_second": 0.941, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 28.16, |
|
"learning_rate": 0.0007164516129032258, |
|
"loss": 0.2505, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 28.32, |
|
"learning_rate": 0.0007148387096774193, |
|
"loss": 0.2182, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 28.48, |
|
"learning_rate": 0.0007132258064516129, |
|
"loss": 0.2261, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 28.64, |
|
"learning_rate": 0.0007116129032258066, |
|
"loss": 0.2171, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 0.00071, |
|
"loss": 0.2013, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 28.96, |
|
"learning_rate": 0.0007083870967741936, |
|
"loss": 0.2317, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.8602029312288614, |
|
"eval_loss": 0.4295957386493683, |
|
"eval_runtime": 29.3564, |
|
"eval_samples_per_second": 60.43, |
|
"eval_steps_per_second": 0.954, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"learning_rate": 0.0007067741935483871, |
|
"loss": 0.2085, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 29.28, |
|
"learning_rate": 0.0007051612903225807, |
|
"loss": 0.2212, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 29.44, |
|
"learning_rate": 0.0007035483870967741, |
|
"loss": 0.236, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"learning_rate": 0.0007019354838709677, |
|
"loss": 0.2215, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 29.76, |
|
"learning_rate": 0.0007003225806451613, |
|
"loss": 0.2208, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 29.92, |
|
"learning_rate": 0.0006987096774193549, |
|
"loss": 0.2118, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8635851183765502, |
|
"eval_loss": 0.44403260946273804, |
|
"eval_runtime": 29.7699, |
|
"eval_samples_per_second": 59.59, |
|
"eval_steps_per_second": 0.941, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 30.08, |
|
"learning_rate": 0.0006970967741935485, |
|
"loss": 0.218, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 30.24, |
|
"learning_rate": 0.0006954838709677419, |
|
"loss": 0.2068, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 0.0006938709677419355, |
|
"loss": 0.2266, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 30.56, |
|
"learning_rate": 0.000692258064516129, |
|
"loss": 0.2188, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 30.72, |
|
"learning_rate": 0.0006906451612903226, |
|
"loss": 0.2149, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 30.88, |
|
"learning_rate": 0.0006890322580645162, |
|
"loss": 0.2224, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"eval_accuracy": 0.8726042841037204, |
|
"eval_loss": 0.39281362295150757, |
|
"eval_runtime": 28.941, |
|
"eval_samples_per_second": 61.297, |
|
"eval_steps_per_second": 0.967, |
|
"step": 1937 |
|
}, |
|
{ |
|
"epoch": 31.04, |
|
"learning_rate": 0.0006874193548387096, |
|
"loss": 0.1924, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"learning_rate": 0.0006858064516129032, |
|
"loss": 0.2095, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 31.36, |
|
"learning_rate": 0.0006841935483870968, |
|
"loss": 0.2074, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 31.52, |
|
"learning_rate": 0.0006825806451612904, |
|
"loss": 0.22, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 31.68, |
|
"learning_rate": 0.0006809677419354838, |
|
"loss": 0.2318, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 31.84, |
|
"learning_rate": 0.0006793548387096774, |
|
"loss": 0.2026, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 0.000677741935483871, |
|
"loss": 0.2166, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8602029312288614, |
|
"eval_loss": 0.424617737531662, |
|
"eval_runtime": 28.9601, |
|
"eval_samples_per_second": 61.257, |
|
"eval_steps_per_second": 0.967, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.16, |
|
"learning_rate": 0.0006761290322580645, |
|
"loss": 0.1975, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 32.32, |
|
"learning_rate": 0.0006745161290322581, |
|
"loss": 0.1867, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 32.48, |
|
"learning_rate": 0.0006729032258064515, |
|
"loss": 0.2128, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 32.64, |
|
"learning_rate": 0.0006712903225806452, |
|
"loss": 0.1861, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"learning_rate": 0.0006696774193548388, |
|
"loss": 0.2282, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 32.96, |
|
"learning_rate": 0.0006680645161290323, |
|
"loss": 0.2038, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_accuracy": 0.870913190529876, |
|
"eval_loss": 0.41463395953178406, |
|
"eval_runtime": 29.1556, |
|
"eval_samples_per_second": 60.846, |
|
"eval_steps_per_second": 0.96, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 33.12, |
|
"learning_rate": 0.0006664516129032259, |
|
"loss": 0.1901, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 33.28, |
|
"learning_rate": 0.0006648387096774193, |
|
"loss": 0.2058, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"learning_rate": 0.0006632258064516129, |
|
"loss": 0.2051, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"learning_rate": 0.0006616129032258064, |
|
"loss": 0.2176, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"learning_rate": 0.00066, |
|
"loss": 0.2031, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 33.92, |
|
"learning_rate": 0.0006583870967741937, |
|
"loss": 0.2183, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8697857948139797, |
|
"eval_loss": 0.416454941034317, |
|
"eval_runtime": 28.8978, |
|
"eval_samples_per_second": 61.389, |
|
"eval_steps_per_second": 0.969, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 34.08, |
|
"learning_rate": 0.0006567741935483871, |
|
"loss": 0.1896, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 34.24, |
|
"learning_rate": 0.0006551612903225807, |
|
"loss": 0.1851, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 34.4, |
|
"learning_rate": 0.0006535483870967742, |
|
"loss": 0.1863, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 34.56, |
|
"learning_rate": 0.0006519354838709678, |
|
"loss": 0.1937, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 34.72, |
|
"learning_rate": 0.0006503225806451612, |
|
"loss": 0.1886, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 34.88, |
|
"learning_rate": 0.0006487096774193548, |
|
"loss": 0.22, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 34.99, |
|
"eval_accuracy": 0.8765501691093573, |
|
"eval_loss": 0.42124196887016296, |
|
"eval_runtime": 28.9343, |
|
"eval_samples_per_second": 61.311, |
|
"eval_steps_per_second": 0.968, |
|
"step": 2187 |
|
}, |
|
{ |
|
"epoch": 35.04, |
|
"learning_rate": 0.0006470967741935484, |
|
"loss": 0.2051, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 0.000645483870967742, |
|
"loss": 0.1762, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 35.36, |
|
"learning_rate": 0.0006438709677419356, |
|
"loss": 0.2054, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 35.52, |
|
"learning_rate": 0.000642258064516129, |
|
"loss": 0.1593, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 35.68, |
|
"learning_rate": 0.0006406451612903226, |
|
"loss": 0.2051, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 35.84, |
|
"learning_rate": 0.0006390322580645161, |
|
"loss": 0.1926, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 0.0006374193548387097, |
|
"loss": 0.206, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8726042841037204, |
|
"eval_loss": 0.41393008828163147, |
|
"eval_runtime": 29.1092, |
|
"eval_samples_per_second": 60.943, |
|
"eval_steps_per_second": 0.962, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 36.16, |
|
"learning_rate": 0.0006358064516129033, |
|
"loss": 0.183, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 36.32, |
|
"learning_rate": 0.0006341935483870967, |
|
"loss": 0.1939, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 36.48, |
|
"learning_rate": 0.0006325806451612904, |
|
"loss": 0.1733, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 36.64, |
|
"learning_rate": 0.0006309677419354839, |
|
"loss": 0.1825, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"learning_rate": 0.0006293548387096775, |
|
"loss": 0.2011, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 36.96, |
|
"learning_rate": 0.0006277419354838709, |
|
"loss": 0.199, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"eval_accuracy": 0.883314543404735, |
|
"eval_loss": 0.3792937695980072, |
|
"eval_runtime": 29.534, |
|
"eval_samples_per_second": 60.066, |
|
"eval_steps_per_second": 0.948, |
|
"step": 2312 |
|
}, |
|
{ |
|
"epoch": 37.12, |
|
"learning_rate": 0.0006261290322580645, |
|
"loss": 0.1911, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 37.28, |
|
"learning_rate": 0.0006245161290322581, |
|
"loss": 0.192, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 37.44, |
|
"learning_rate": 0.0006229032258064516, |
|
"loss": 0.1849, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 37.6, |
|
"learning_rate": 0.0006212903225806452, |
|
"loss": 0.1872, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 37.76, |
|
"learning_rate": 0.0006196774193548386, |
|
"loss": 0.1891, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 37.92, |
|
"learning_rate": 0.0006180645161290323, |
|
"loss": 0.1926, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8838782412626832, |
|
"eval_loss": 0.41269081830978394, |
|
"eval_runtime": 35.7673, |
|
"eval_samples_per_second": 49.598, |
|
"eval_steps_per_second": 0.783, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 38.08, |
|
"learning_rate": 0.0006164516129032258, |
|
"loss": 0.1762, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 38.24, |
|
"learning_rate": 0.0006148387096774194, |
|
"loss": 0.183, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"learning_rate": 0.000613225806451613, |
|
"loss": 0.1815, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 38.56, |
|
"learning_rate": 0.0006116129032258064, |
|
"loss": 0.1943, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 38.72, |
|
"learning_rate": 0.00061, |
|
"loss": 0.1557, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 38.88, |
|
"learning_rate": 0.0006083870967741935, |
|
"loss": 0.1648, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"eval_accuracy": 0.8821871476888388, |
|
"eval_loss": 0.42961573600769043, |
|
"eval_runtime": 29.689, |
|
"eval_samples_per_second": 59.753, |
|
"eval_steps_per_second": 0.943, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 39.04, |
|
"learning_rate": 0.0006067741935483871, |
|
"loss": 0.1698, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 39.2, |
|
"learning_rate": 0.0006051612903225806, |
|
"loss": 0.1508, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 39.36, |
|
"learning_rate": 0.0006035483870967742, |
|
"loss": 0.1622, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 39.52, |
|
"learning_rate": 0.0006019354838709678, |
|
"loss": 0.1719, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 39.68, |
|
"learning_rate": 0.0006003225806451613, |
|
"loss": 0.1916, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 39.84, |
|
"learning_rate": 0.0005987096774193549, |
|
"loss": 0.1853, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0005970967741935483, |
|
"loss": 0.1578, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.883314543404735, |
|
"eval_loss": 0.4131587743759155, |
|
"eval_runtime": 29.7946, |
|
"eval_samples_per_second": 59.541, |
|
"eval_steps_per_second": 0.94, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 40.16, |
|
"learning_rate": 0.0005954838709677419, |
|
"loss": 0.1591, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"learning_rate": 0.0005938709677419354, |
|
"loss": 0.1674, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 40.48, |
|
"learning_rate": 0.0005922580645161291, |
|
"loss": 0.1614, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 40.64, |
|
"learning_rate": 0.0005906451612903227, |
|
"loss": 0.1501, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 40.8, |
|
"learning_rate": 0.0005890322580645161, |
|
"loss": 0.1819, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 40.96, |
|
"learning_rate": 0.0005874193548387097, |
|
"loss": 0.181, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"eval_accuracy": 0.8776775648252536, |
|
"eval_loss": 0.4216878414154053, |
|
"eval_runtime": 29.999, |
|
"eval_samples_per_second": 59.135, |
|
"eval_steps_per_second": 0.933, |
|
"step": 2562 |
|
}, |
|
{ |
|
"epoch": 41.12, |
|
"learning_rate": 0.0005858064516129032, |
|
"loss": 0.179, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 41.28, |
|
"learning_rate": 0.0005841935483870968, |
|
"loss": 0.1754, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 41.44, |
|
"learning_rate": 0.0005825806451612904, |
|
"loss": 0.1836, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"learning_rate": 0.0005809677419354838, |
|
"loss": 0.1896, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 41.76, |
|
"learning_rate": 0.0005793548387096775, |
|
"loss": 0.1587, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 41.92, |
|
"learning_rate": 0.000577741935483871, |
|
"loss": 0.1735, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.8714768883878241, |
|
"eval_loss": 0.41855669021606445, |
|
"eval_runtime": 30.1587, |
|
"eval_samples_per_second": 58.822, |
|
"eval_steps_per_second": 0.928, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 42.08, |
|
"learning_rate": 0.0005761290322580646, |
|
"loss": 0.1581, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 42.24, |
|
"learning_rate": 0.000574516129032258, |
|
"loss": 0.1453, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"learning_rate": 0.0005729032258064516, |
|
"loss": 0.166, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 42.56, |
|
"learning_rate": 0.0005712903225806452, |
|
"loss": 0.1674, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 42.72, |
|
"learning_rate": 0.0005696774193548387, |
|
"loss": 0.1543, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 42.88, |
|
"learning_rate": 0.0005680645161290323, |
|
"loss": 0.1603, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 42.99, |
|
"eval_accuracy": 0.8804960541149943, |
|
"eval_loss": 0.411676824092865, |
|
"eval_runtime": 29.9513, |
|
"eval_samples_per_second": 59.229, |
|
"eval_steps_per_second": 0.935, |
|
"step": 2687 |
|
}, |
|
{ |
|
"epoch": 43.04, |
|
"learning_rate": 0.0005664516129032258, |
|
"loss": 0.1608, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"learning_rate": 0.0005648387096774194, |
|
"loss": 0.1442, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 43.36, |
|
"learning_rate": 0.0005632258064516129, |
|
"loss": 0.1625, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 43.52, |
|
"learning_rate": 0.0005616129032258065, |
|
"loss": 0.1844, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 43.68, |
|
"learning_rate": 0.0005600000000000001, |
|
"loss": 0.1512, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 43.84, |
|
"learning_rate": 0.0005583870967741935, |
|
"loss": 0.1777, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 0.0005567741935483871, |
|
"loss": 0.1516, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8816234498308907, |
|
"eval_loss": 0.424953430891037, |
|
"eval_runtime": 29.9383, |
|
"eval_samples_per_second": 59.255, |
|
"eval_steps_per_second": 0.935, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 44.16, |
|
"learning_rate": 0.0005551612903225806, |
|
"loss": 0.1486, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 44.32, |
|
"learning_rate": 0.0005535483870967743, |
|
"loss": 0.1565, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 44.48, |
|
"learning_rate": 0.0005519354838709677, |
|
"loss": 0.1871, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 44.64, |
|
"learning_rate": 0.0005503225806451613, |
|
"loss": 0.1471, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 0.0005487096774193549, |
|
"loss": 0.1569, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 44.96, |
|
"learning_rate": 0.0005470967741935484, |
|
"loss": 0.1733, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"eval_accuracy": 0.8844419391206313, |
|
"eval_loss": 0.3913884162902832, |
|
"eval_runtime": 30.3051, |
|
"eval_samples_per_second": 58.538, |
|
"eval_steps_per_second": 0.924, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 45.12, |
|
"learning_rate": 0.000545483870967742, |
|
"loss": 0.1656, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"learning_rate": 0.0005438709677419354, |
|
"loss": 0.1761, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 45.44, |
|
"learning_rate": 0.000542258064516129, |
|
"loss": 0.156, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 45.6, |
|
"learning_rate": 0.0005406451612903226, |
|
"loss": 0.1559, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 45.76, |
|
"learning_rate": 0.0005390322580645162, |
|
"loss": 0.1577, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 45.92, |
|
"learning_rate": 0.0005374193548387098, |
|
"loss": 0.164, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.882750845546787, |
|
"eval_loss": 0.4368877112865448, |
|
"eval_runtime": 29.8462, |
|
"eval_samples_per_second": 59.438, |
|
"eval_steps_per_second": 0.938, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 46.08, |
|
"learning_rate": 0.0005358064516129032, |
|
"loss": 0.1496, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 46.24, |
|
"learning_rate": 0.0005341935483870968, |
|
"loss": 0.146, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"learning_rate": 0.0005325806451612903, |
|
"loss": 0.154, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 46.56, |
|
"learning_rate": 0.0005309677419354839, |
|
"loss": 0.1374, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 46.72, |
|
"learning_rate": 0.0005293548387096773, |
|
"loss": 0.1429, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 46.88, |
|
"learning_rate": 0.0005277419354838709, |
|
"loss": 0.1519, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 46.99, |
|
"eval_accuracy": 0.8771138669673055, |
|
"eval_loss": 0.4275626540184021, |
|
"eval_runtime": 28.9785, |
|
"eval_samples_per_second": 61.218, |
|
"eval_steps_per_second": 0.966, |
|
"step": 2937 |
|
}, |
|
{ |
|
"epoch": 47.04, |
|
"learning_rate": 0.0005261290322580646, |
|
"loss": 0.1551, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"learning_rate": 0.0005245161290322581, |
|
"loss": 0.1462, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 47.36, |
|
"learning_rate": 0.0005229032258064517, |
|
"loss": 0.156, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 47.52, |
|
"learning_rate": 0.0005212903225806451, |
|
"loss": 0.1605, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 47.68, |
|
"learning_rate": 0.0005196774193548387, |
|
"loss": 0.1502, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 47.84, |
|
"learning_rate": 0.0005180645161290322, |
|
"loss": 0.1584, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 0.0005164516129032258, |
|
"loss": 0.1534, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8821871476888388, |
|
"eval_loss": 0.4420623481273651, |
|
"eval_runtime": 29.0293, |
|
"eval_samples_per_second": 61.111, |
|
"eval_steps_per_second": 0.965, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 48.16, |
|
"learning_rate": 0.0005148387096774194, |
|
"loss": 0.1391, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 48.32, |
|
"learning_rate": 0.0005132258064516129, |
|
"loss": 0.1403, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 48.48, |
|
"learning_rate": 0.0005116129032258065, |
|
"loss": 0.1518, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 48.64, |
|
"learning_rate": 0.00051, |
|
"loss": 0.1225, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 48.8, |
|
"learning_rate": 0.0005083870967741936, |
|
"loss": 0.1386, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 48.96, |
|
"learning_rate": 0.000506774193548387, |
|
"loss": 0.158, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 48.99, |
|
"eval_accuracy": 0.887260428410372, |
|
"eval_loss": 0.4240320920944214, |
|
"eval_runtime": 29.0851, |
|
"eval_samples_per_second": 60.993, |
|
"eval_steps_per_second": 0.963, |
|
"step": 3062 |
|
}, |
|
{ |
|
"epoch": 49.12, |
|
"learning_rate": 0.0005051612903225806, |
|
"loss": 0.1512, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 49.28, |
|
"learning_rate": 0.0005035483870967742, |
|
"loss": 0.1547, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 49.44, |
|
"learning_rate": 0.0005019354838709677, |
|
"loss": 0.1441, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"learning_rate": 0.0005003225806451614, |
|
"loss": 0.1515, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 49.76, |
|
"learning_rate": 0.0004987096774193548, |
|
"loss": 0.1486, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 49.92, |
|
"learning_rate": 0.0004970967741935484, |
|
"loss": 0.1531, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8793686583990981, |
|
"eval_loss": 0.42499276995658875, |
|
"eval_runtime": 28.8635, |
|
"eval_samples_per_second": 61.462, |
|
"eval_steps_per_second": 0.97, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 50.08, |
|
"learning_rate": 0.000495483870967742, |
|
"loss": 0.1479, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 50.24, |
|
"learning_rate": 0.0004938709677419355, |
|
"loss": 0.1335, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 50.4, |
|
"learning_rate": 0.0004922580645161291, |
|
"loss": 0.1371, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 50.56, |
|
"learning_rate": 0.0004906451612903226, |
|
"loss": 0.1207, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 50.72, |
|
"learning_rate": 0.0004890322580645161, |
|
"loss": 0.1377, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 50.88, |
|
"learning_rate": 0.0004874193548387097, |
|
"loss": 0.1286, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 50.99, |
|
"eval_accuracy": 0.8731679819616686, |
|
"eval_loss": 0.4228157103061676, |
|
"eval_runtime": 29.0583, |
|
"eval_samples_per_second": 61.05, |
|
"eval_steps_per_second": 0.964, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 51.04, |
|
"learning_rate": 0.0004858064516129032, |
|
"loss": 0.129, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"learning_rate": 0.0004841935483870968, |
|
"loss": 0.1243, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 51.36, |
|
"learning_rate": 0.00048258064516129036, |
|
"loss": 0.1601, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 51.52, |
|
"learning_rate": 0.00048096774193548387, |
|
"loss": 0.1425, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 51.68, |
|
"learning_rate": 0.0004793548387096774, |
|
"loss": 0.165, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 51.84, |
|
"learning_rate": 0.000477741935483871, |
|
"loss": 0.1281, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 0.00047612903225806454, |
|
"loss": 0.1396, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8782412626832018, |
|
"eval_loss": 0.43168801069259644, |
|
"eval_runtime": 28.9514, |
|
"eval_samples_per_second": 61.275, |
|
"eval_steps_per_second": 0.967, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 52.16, |
|
"learning_rate": 0.00047451612903225804, |
|
"loss": 0.1302, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 52.32, |
|
"learning_rate": 0.0004729032258064516, |
|
"loss": 0.1402, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 52.48, |
|
"learning_rate": 0.0004712903225806452, |
|
"loss": 0.1431, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 52.64, |
|
"learning_rate": 0.0004696774193548387, |
|
"loss": 0.1347, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"learning_rate": 0.0004680645161290323, |
|
"loss": 0.1198, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 52.96, |
|
"learning_rate": 0.0004664516129032258, |
|
"loss": 0.1436, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 52.99, |
|
"eval_accuracy": 0.8855693348365277, |
|
"eval_loss": 0.4360513389110565, |
|
"eval_runtime": 28.9788, |
|
"eval_samples_per_second": 61.217, |
|
"eval_steps_per_second": 0.966, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 53.12, |
|
"learning_rate": 0.0004648387096774194, |
|
"loss": 0.1304, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 53.28, |
|
"learning_rate": 0.0004632258064516129, |
|
"loss": 0.1353, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 53.44, |
|
"learning_rate": 0.00046161290322580646, |
|
"loss": 0.1354, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 53.6, |
|
"learning_rate": 0.00046, |
|
"loss": 0.1217, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 53.76, |
|
"learning_rate": 0.00045838709677419357, |
|
"loss": 0.1266, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 53.92, |
|
"learning_rate": 0.00045677419354838713, |
|
"loss": 0.1411, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.8850056369785795, |
|
"eval_loss": 0.44017305970191956, |
|
"eval_runtime": 28.9504, |
|
"eval_samples_per_second": 61.277, |
|
"eval_steps_per_second": 0.967, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 54.08, |
|
"learning_rate": 0.00045516129032258063, |
|
"loss": 0.1574, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 54.24, |
|
"learning_rate": 0.0004535483870967742, |
|
"loss": 0.1372, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 54.4, |
|
"learning_rate": 0.0004519354838709678, |
|
"loss": 0.1302, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 54.56, |
|
"learning_rate": 0.0004503225806451613, |
|
"loss": 0.132, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 54.72, |
|
"learning_rate": 0.00044870967741935487, |
|
"loss": 0.122, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 54.88, |
|
"learning_rate": 0.00044709677419354837, |
|
"loss": 0.1312, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 54.99, |
|
"eval_accuracy": 0.8883878241262683, |
|
"eval_loss": 0.43266087770462036, |
|
"eval_runtime": 28.9781, |
|
"eval_samples_per_second": 61.219, |
|
"eval_steps_per_second": 0.966, |
|
"step": 3437 |
|
}, |
|
{ |
|
"epoch": 55.04, |
|
"learning_rate": 0.00044548387096774193, |
|
"loss": 0.1421, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 55.2, |
|
"learning_rate": 0.0004438709677419355, |
|
"loss": 0.1223, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 55.36, |
|
"learning_rate": 0.00044225806451612905, |
|
"loss": 0.1147, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 55.52, |
|
"learning_rate": 0.0004406451612903226, |
|
"loss": 0.1318, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 55.68, |
|
"learning_rate": 0.0004390322580645161, |
|
"loss": 0.1324, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 55.84, |
|
"learning_rate": 0.0004374193548387097, |
|
"loss": 0.1436, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 0.0004358064516129032, |
|
"loss": 0.1359, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8855693348365277, |
|
"eval_loss": 0.41437384486198425, |
|
"eval_runtime": 29.1061, |
|
"eval_samples_per_second": 60.949, |
|
"eval_steps_per_second": 0.962, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 56.16, |
|
"learning_rate": 0.0004341935483870968, |
|
"loss": 0.1191, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 56.32, |
|
"learning_rate": 0.0004325806451612903, |
|
"loss": 0.1179, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 56.48, |
|
"learning_rate": 0.0004309677419354839, |
|
"loss": 0.1278, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 56.64, |
|
"learning_rate": 0.00042935483870967746, |
|
"loss": 0.1383, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 56.8, |
|
"learning_rate": 0.00042774193548387096, |
|
"loss": 0.1281, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 56.96, |
|
"learning_rate": 0.0004261290322580645, |
|
"loss": 0.1361, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 56.99, |
|
"eval_accuracy": 0.8866967305524239, |
|
"eval_loss": 0.4180738031864166, |
|
"eval_runtime": 29.0289, |
|
"eval_samples_per_second": 61.112, |
|
"eval_steps_per_second": 0.965, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 57.12, |
|
"learning_rate": 0.0004245161290322581, |
|
"loss": 0.1415, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 57.28, |
|
"learning_rate": 0.00042290322580645163, |
|
"loss": 0.1205, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 57.44, |
|
"learning_rate": 0.00042129032258064514, |
|
"loss": 0.1225, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 57.6, |
|
"learning_rate": 0.0004196774193548387, |
|
"loss": 0.1222, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 57.76, |
|
"learning_rate": 0.0004180645161290323, |
|
"loss": 0.1174, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 57.92, |
|
"learning_rate": 0.0004164516129032258, |
|
"loss": 0.1272, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.8878241262683202, |
|
"eval_loss": 0.4203573763370514, |
|
"eval_runtime": 28.9973, |
|
"eval_samples_per_second": 61.178, |
|
"eval_steps_per_second": 0.966, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 58.08, |
|
"learning_rate": 0.00041483870967741937, |
|
"loss": 0.1359, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 58.24, |
|
"learning_rate": 0.0004132258064516129, |
|
"loss": 0.1267, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 58.4, |
|
"learning_rate": 0.0004116129032258065, |
|
"loss": 0.1251, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 58.56, |
|
"learning_rate": 0.00041, |
|
"loss": 0.1107, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 58.72, |
|
"learning_rate": 0.00040838709677419355, |
|
"loss": 0.1147, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 58.88, |
|
"learning_rate": 0.0004067741935483871, |
|
"loss": 0.1222, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 58.99, |
|
"eval_accuracy": 0.8883878241262683, |
|
"eval_loss": 0.4137117862701416, |
|
"eval_runtime": 28.9138, |
|
"eval_samples_per_second": 61.355, |
|
"eval_steps_per_second": 0.968, |
|
"step": 3687 |
|
}, |
|
{ |
|
"epoch": 59.04, |
|
"learning_rate": 0.00040516129032258067, |
|
"loss": 0.1276, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 59.2, |
|
"learning_rate": 0.0004035483870967742, |
|
"loss": 0.1144, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 59.36, |
|
"learning_rate": 0.00040193548387096773, |
|
"loss": 0.1372, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 59.52, |
|
"learning_rate": 0.0004003225806451613, |
|
"loss": 0.1258, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 59.68, |
|
"learning_rate": 0.00039870967741935484, |
|
"loss": 0.1208, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 59.84, |
|
"learning_rate": 0.0003970967741935484, |
|
"loss": 0.1129, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.00039548387096774196, |
|
"loss": 0.1272, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8889515219842165, |
|
"eval_loss": 0.4316939115524292, |
|
"eval_runtime": 29.1565, |
|
"eval_samples_per_second": 60.844, |
|
"eval_steps_per_second": 0.96, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 60.16, |
|
"learning_rate": 0.00039387096774193546, |
|
"loss": 0.1011, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 60.32, |
|
"learning_rate": 0.0003922580645161291, |
|
"loss": 0.1191, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 60.48, |
|
"learning_rate": 0.0003906451612903226, |
|
"loss": 0.1279, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 60.64, |
|
"learning_rate": 0.00038903225806451614, |
|
"loss": 0.1138, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"learning_rate": 0.00038741935483870964, |
|
"loss": 0.1091, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 60.96, |
|
"learning_rate": 0.00038580645161290325, |
|
"loss": 0.1132, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 60.99, |
|
"eval_accuracy": 0.8917700112739572, |
|
"eval_loss": 0.43509015440940857, |
|
"eval_runtime": 29.022, |
|
"eval_samples_per_second": 61.126, |
|
"eval_steps_per_second": 0.965, |
|
"step": 3812 |
|
}, |
|
{ |
|
"epoch": 61.12, |
|
"learning_rate": 0.0003841935483870968, |
|
"loss": 0.1279, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 61.28, |
|
"learning_rate": 0.0003825806451612903, |
|
"loss": 0.1127, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 61.44, |
|
"learning_rate": 0.0003809677419354839, |
|
"loss": 0.1205, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 61.6, |
|
"learning_rate": 0.00037935483870967743, |
|
"loss": 0.1224, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 61.76, |
|
"learning_rate": 0.000377741935483871, |
|
"loss": 0.1191, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 61.92, |
|
"learning_rate": 0.0003761290322580645, |
|
"loss": 0.1239, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.882750845546787, |
|
"eval_loss": 0.43482401967048645, |
|
"eval_runtime": 29.0447, |
|
"eval_samples_per_second": 61.078, |
|
"eval_steps_per_second": 0.964, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 62.08, |
|
"learning_rate": 0.00037451612903225805, |
|
"loss": 0.1195, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 62.24, |
|
"learning_rate": 0.00037290322580645167, |
|
"loss": 0.1266, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 62.4, |
|
"learning_rate": 0.00037129032258064517, |
|
"loss": 0.1219, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 62.56, |
|
"learning_rate": 0.00036967741935483873, |
|
"loss": 0.1191, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 62.72, |
|
"learning_rate": 0.00036806451612903223, |
|
"loss": 0.116, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 62.88, |
|
"learning_rate": 0.00036645161290322584, |
|
"loss": 0.1188, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 62.99, |
|
"eval_accuracy": 0.8861330326944757, |
|
"eval_loss": 0.42578133940696716, |
|
"eval_runtime": 29.0436, |
|
"eval_samples_per_second": 61.081, |
|
"eval_steps_per_second": 0.964, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 63.04, |
|
"learning_rate": 0.0003648387096774194, |
|
"loss": 0.1066, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 63.2, |
|
"learning_rate": 0.0003632258064516129, |
|
"loss": 0.1082, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 63.36, |
|
"learning_rate": 0.00036161290322580646, |
|
"loss": 0.0991, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 63.52, |
|
"learning_rate": 0.00035999999999999997, |
|
"loss": 0.1223, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 63.68, |
|
"learning_rate": 0.0003583870967741936, |
|
"loss": 0.1219, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 63.84, |
|
"learning_rate": 0.0003567741935483871, |
|
"loss": 0.1172, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 0.00035516129032258064, |
|
"loss": 0.1203, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.891206313416009, |
|
"eval_loss": 0.43181148171424866, |
|
"eval_runtime": 28.9224, |
|
"eval_samples_per_second": 61.337, |
|
"eval_steps_per_second": 0.968, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 64.16, |
|
"learning_rate": 0.0003535483870967742, |
|
"loss": 0.1237, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 64.32, |
|
"learning_rate": 0.00035193548387096776, |
|
"loss": 0.1162, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 64.48, |
|
"learning_rate": 0.0003503225806451613, |
|
"loss": 0.1224, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 64.64, |
|
"learning_rate": 0.0003487096774193548, |
|
"loss": 0.1086, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 64.8, |
|
"learning_rate": 0.0003470967741935484, |
|
"loss": 0.1034, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 64.96, |
|
"learning_rate": 0.00034548387096774194, |
|
"loss": 0.1204, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 64.99, |
|
"eval_accuracy": 0.895152198421646, |
|
"eval_loss": 0.4054950177669525, |
|
"eval_runtime": 29.0688, |
|
"eval_samples_per_second": 61.028, |
|
"eval_steps_per_second": 0.963, |
|
"step": 4062 |
|
}, |
|
{ |
|
"epoch": 65.12, |
|
"learning_rate": 0.0003438709677419355, |
|
"loss": 0.107, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 65.28, |
|
"learning_rate": 0.00034225806451612905, |
|
"loss": 0.1162, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 65.44, |
|
"learning_rate": 0.00034064516129032256, |
|
"loss": 0.1244, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 65.6, |
|
"learning_rate": 0.00033903225806451617, |
|
"loss": 0.0922, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 65.76, |
|
"learning_rate": 0.0003374193548387097, |
|
"loss": 0.1148, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 65.92, |
|
"learning_rate": 0.00033580645161290323, |
|
"loss": 0.1053, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.8917700112739572, |
|
"eval_loss": 0.4222296476364136, |
|
"eval_runtime": 29.1218, |
|
"eval_samples_per_second": 60.917, |
|
"eval_steps_per_second": 0.961, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 66.08, |
|
"learning_rate": 0.00033419354838709674, |
|
"loss": 0.1089, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 66.24, |
|
"learning_rate": 0.00033258064516129035, |
|
"loss": 0.1069, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 66.4, |
|
"learning_rate": 0.0003309677419354839, |
|
"loss": 0.1049, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 66.56, |
|
"learning_rate": 0.0003293548387096774, |
|
"loss": 0.1042, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 66.72, |
|
"learning_rate": 0.00032774193548387097, |
|
"loss": 0.1095, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 66.88, |
|
"learning_rate": 0.0003261290322580645, |
|
"loss": 0.1187, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 66.99, |
|
"eval_accuracy": 0.8945885005636979, |
|
"eval_loss": 0.4248427152633667, |
|
"eval_runtime": 29.0376, |
|
"eval_samples_per_second": 61.093, |
|
"eval_steps_per_second": 0.964, |
|
"step": 4187 |
|
}, |
|
{ |
|
"epoch": 67.04, |
|
"learning_rate": 0.0003245161290322581, |
|
"loss": 0.1181, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 67.2, |
|
"learning_rate": 0.0003229032258064516, |
|
"loss": 0.1154, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 67.36, |
|
"learning_rate": 0.00032129032258064515, |
|
"loss": 0.1186, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 67.52, |
|
"learning_rate": 0.00031967741935483876, |
|
"loss": 0.122, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 67.68, |
|
"learning_rate": 0.00031806451612903226, |
|
"loss": 0.107, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 67.84, |
|
"learning_rate": 0.0003164516129032258, |
|
"loss": 0.0939, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 0.0003148387096774193, |
|
"loss": 0.1129, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8923337091319054, |
|
"eval_loss": 0.4301997125148773, |
|
"eval_runtime": 28.9069, |
|
"eval_samples_per_second": 61.369, |
|
"eval_steps_per_second": 0.969, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 68.16, |
|
"learning_rate": 0.00031322580645161294, |
|
"loss": 0.097, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 68.32, |
|
"learning_rate": 0.00031161290322580644, |
|
"loss": 0.0899, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 68.48, |
|
"learning_rate": 0.00031, |
|
"loss": 0.1098, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 68.64, |
|
"learning_rate": 0.00030838709677419356, |
|
"loss": 0.1109, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"learning_rate": 0.0003067741935483871, |
|
"loss": 0.1023, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 68.96, |
|
"learning_rate": 0.0003051612903225807, |
|
"loss": 0.1117, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 68.99, |
|
"eval_accuracy": 0.8968432919954904, |
|
"eval_loss": 0.414861261844635, |
|
"eval_runtime": 29.0625, |
|
"eval_samples_per_second": 61.041, |
|
"eval_steps_per_second": 0.963, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 69.12, |
|
"learning_rate": 0.0003035483870967742, |
|
"loss": 0.1158, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 69.28, |
|
"learning_rate": 0.00030193548387096774, |
|
"loss": 0.1051, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 69.44, |
|
"learning_rate": 0.0003003225806451613, |
|
"loss": 0.1004, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 69.6, |
|
"learning_rate": 0.00029870967741935485, |
|
"loss": 0.1205, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 69.76, |
|
"learning_rate": 0.0002970967741935484, |
|
"loss": 0.1114, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 69.92, |
|
"learning_rate": 0.0002954838709677419, |
|
"loss": 0.1194, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.8895152198421646, |
|
"eval_loss": 0.41601237654685974, |
|
"eval_runtime": 29.1078, |
|
"eval_samples_per_second": 60.946, |
|
"eval_steps_per_second": 0.962, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 70.08, |
|
"learning_rate": 0.00029387096774193553, |
|
"loss": 0.0931, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 70.24, |
|
"learning_rate": 0.00029225806451612903, |
|
"loss": 0.107, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 70.4, |
|
"learning_rate": 0.0002906451612903226, |
|
"loss": 0.1052, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 70.56, |
|
"learning_rate": 0.00028903225806451615, |
|
"loss": 0.1057, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 70.72, |
|
"learning_rate": 0.0002874193548387097, |
|
"loss": 0.1104, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 70.88, |
|
"learning_rate": 0.00028580645161290326, |
|
"loss": 0.1003, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 70.99, |
|
"eval_accuracy": 0.8945885005636979, |
|
"eval_loss": 0.425587922334671, |
|
"eval_runtime": 29.0303, |
|
"eval_samples_per_second": 61.109, |
|
"eval_steps_per_second": 0.965, |
|
"step": 4437 |
|
}, |
|
{ |
|
"epoch": 71.04, |
|
"learning_rate": 0.00028419354838709677, |
|
"loss": 0.1053, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 71.2, |
|
"learning_rate": 0.0002825806451612903, |
|
"loss": 0.0978, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 71.36, |
|
"learning_rate": 0.00028096774193548383, |
|
"loss": 0.0927, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 71.52, |
|
"learning_rate": 0.00027935483870967744, |
|
"loss": 0.1161, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 71.68, |
|
"learning_rate": 0.000277741935483871, |
|
"loss": 0.1061, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 71.84, |
|
"learning_rate": 0.0002761290322580645, |
|
"loss": 0.1018, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 0.00027451612903225806, |
|
"loss": 0.1088, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8917700112739572, |
|
"eval_loss": 0.4356169104576111, |
|
"eval_runtime": 29.136, |
|
"eval_samples_per_second": 60.887, |
|
"eval_steps_per_second": 0.961, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 72.16, |
|
"learning_rate": 0.0002729032258064516, |
|
"loss": 0.0891, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 72.32, |
|
"learning_rate": 0.0002712903225806452, |
|
"loss": 0.1021, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 72.48, |
|
"learning_rate": 0.0002696774193548387, |
|
"loss": 0.0923, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"learning_rate": 0.00026806451612903224, |
|
"loss": 0.0987, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 72.8, |
|
"learning_rate": 0.00026645161290322585, |
|
"loss": 0.0909, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 72.96, |
|
"learning_rate": 0.00026483870967741936, |
|
"loss": 0.11, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 72.99, |
|
"eval_accuracy": 0.8934611048478016, |
|
"eval_loss": 0.42773857712745667, |
|
"eval_runtime": 29.2049, |
|
"eval_samples_per_second": 60.743, |
|
"eval_steps_per_second": 0.959, |
|
"step": 4562 |
|
}, |
|
{ |
|
"epoch": 73.12, |
|
"learning_rate": 0.0002632258064516129, |
|
"loss": 0.0995, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 73.28, |
|
"learning_rate": 0.0002616129032258064, |
|
"loss": 0.1036, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 73.44, |
|
"learning_rate": 0.00026000000000000003, |
|
"loss": 0.1089, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 73.6, |
|
"learning_rate": 0.00025838709677419354, |
|
"loss": 0.093, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 73.76, |
|
"learning_rate": 0.0002567741935483871, |
|
"loss": 0.1, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 73.92, |
|
"learning_rate": 0.00025516129032258065, |
|
"loss": 0.1016, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.895152198421646, |
|
"eval_loss": 0.4094755947589874, |
|
"eval_runtime": 28.9726, |
|
"eval_samples_per_second": 61.23, |
|
"eval_steps_per_second": 0.966, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 74.08, |
|
"learning_rate": 0.0002535483870967742, |
|
"loss": 0.1106, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 74.24, |
|
"learning_rate": 0.00025193548387096777, |
|
"loss": 0.1058, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 74.4, |
|
"learning_rate": 0.00025032258064516127, |
|
"loss": 0.1011, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 74.56, |
|
"learning_rate": 0.00024870967741935483, |
|
"loss": 0.0881, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 74.72, |
|
"learning_rate": 0.0002470967741935484, |
|
"loss": 0.0912, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 74.88, |
|
"learning_rate": 0.00024548387096774195, |
|
"loss": 0.0906, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 74.99, |
|
"eval_accuracy": 0.8934611048478016, |
|
"eval_loss": 0.4261699616909027, |
|
"eval_runtime": 29.3184, |
|
"eval_samples_per_second": 60.508, |
|
"eval_steps_per_second": 0.955, |
|
"step": 4687 |
|
}, |
|
{ |
|
"epoch": 75.04, |
|
"learning_rate": 0.00024387096774193548, |
|
"loss": 0.0994, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 75.2, |
|
"learning_rate": 0.00024225806451612904, |
|
"loss": 0.0971, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 75.36, |
|
"learning_rate": 0.00024064516129032257, |
|
"loss": 0.1087, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 75.52, |
|
"learning_rate": 0.00023903225806451615, |
|
"loss": 0.105, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 75.68, |
|
"learning_rate": 0.00023741935483870968, |
|
"loss": 0.0975, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 75.84, |
|
"learning_rate": 0.00023580645161290324, |
|
"loss": 0.1142, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"learning_rate": 0.00023419354838709677, |
|
"loss": 0.0969, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.8940248027057497, |
|
"eval_loss": 0.4057069718837738, |
|
"eval_runtime": 28.9742, |
|
"eval_samples_per_second": 61.227, |
|
"eval_steps_per_second": 0.966, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 76.16, |
|
"learning_rate": 0.00023258064516129033, |
|
"loss": 0.1035, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 76.32, |
|
"learning_rate": 0.00023096774193548386, |
|
"loss": 0.0803, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 76.48, |
|
"learning_rate": 0.00022935483870967742, |
|
"loss": 0.0898, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 76.64, |
|
"learning_rate": 0.00022774193548387098, |
|
"loss": 0.1007, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 76.8, |
|
"learning_rate": 0.00022612903225806454, |
|
"loss": 0.1113, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 76.96, |
|
"learning_rate": 0.00022451612903225807, |
|
"loss": 0.111, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 76.99, |
|
"eval_accuracy": 0.8996617812852311, |
|
"eval_loss": 0.40985479950904846, |
|
"eval_runtime": 29.09, |
|
"eval_samples_per_second": 60.983, |
|
"eval_steps_per_second": 0.963, |
|
"step": 4812 |
|
}, |
|
{ |
|
"epoch": 77.12, |
|
"learning_rate": 0.00022290322580645162, |
|
"loss": 0.118, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 77.28, |
|
"learning_rate": 0.00022129032258064516, |
|
"loss": 0.0928, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 77.44, |
|
"learning_rate": 0.00021967741935483871, |
|
"loss": 0.0967, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 77.6, |
|
"learning_rate": 0.00021806451612903227, |
|
"loss": 0.0979, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 77.76, |
|
"learning_rate": 0.00021645161290322583, |
|
"loss": 0.0879, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 77.92, |
|
"learning_rate": 0.00021483870967741936, |
|
"loss": 0.091, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.8962795941375423, |
|
"eval_loss": 0.42323029041290283, |
|
"eval_runtime": 29.0776, |
|
"eval_samples_per_second": 61.009, |
|
"eval_steps_per_second": 0.963, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 78.08, |
|
"learning_rate": 0.00021322580645161292, |
|
"loss": 0.0962, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 78.24, |
|
"learning_rate": 0.00021161290322580645, |
|
"loss": 0.105, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 78.4, |
|
"learning_rate": 0.00021, |
|
"loss": 0.099, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 78.56, |
|
"learning_rate": 0.00020838709677419354, |
|
"loss": 0.1132, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 78.72, |
|
"learning_rate": 0.00020677419354838713, |
|
"loss": 0.0909, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 78.88, |
|
"learning_rate": 0.00020516129032258066, |
|
"loss": 0.1013, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 78.99, |
|
"eval_accuracy": 0.8883878241262683, |
|
"eval_loss": 0.4311448037624359, |
|
"eval_runtime": 28.999, |
|
"eval_samples_per_second": 61.174, |
|
"eval_steps_per_second": 0.966, |
|
"step": 4937 |
|
}, |
|
{ |
|
"epoch": 79.04, |
|
"learning_rate": 0.00020354838709677421, |
|
"loss": 0.0946, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 79.2, |
|
"learning_rate": 0.00020193548387096775, |
|
"loss": 0.0842, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 79.36, |
|
"learning_rate": 0.00020032258064516128, |
|
"loss": 0.1172, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 79.52, |
|
"learning_rate": 0.00019870967741935483, |
|
"loss": 0.0936, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 79.68, |
|
"learning_rate": 0.00019709677419354837, |
|
"loss": 0.0993, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 79.84, |
|
"learning_rate": 0.00019548387096774195, |
|
"loss": 0.1049, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.00019387096774193548, |
|
"loss": 0.119, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.8928974069898534, |
|
"eval_loss": 0.43020305037498474, |
|
"eval_runtime": 29.1182, |
|
"eval_samples_per_second": 60.924, |
|
"eval_steps_per_second": 0.962, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 80.16, |
|
"learning_rate": 0.00019225806451612904, |
|
"loss": 0.1009, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 80.32, |
|
"learning_rate": 0.00019064516129032257, |
|
"loss": 0.099, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 80.48, |
|
"learning_rate": 0.00018903225806451613, |
|
"loss": 0.0966, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 80.64, |
|
"learning_rate": 0.00018741935483870966, |
|
"loss": 0.1195, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 80.8, |
|
"learning_rate": 0.00018580645161290325, |
|
"loss": 0.0997, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 80.96, |
|
"learning_rate": 0.00018419354838709678, |
|
"loss": 0.0877, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 80.99, |
|
"eval_accuracy": 0.8923337091319054, |
|
"eval_loss": 0.4369201958179474, |
|
"eval_runtime": 29.0539, |
|
"eval_samples_per_second": 61.059, |
|
"eval_steps_per_second": 0.964, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 81.12, |
|
"learning_rate": 0.00018258064516129033, |
|
"loss": 0.1046, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 81.28, |
|
"learning_rate": 0.00018096774193548387, |
|
"loss": 0.1, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 81.44, |
|
"learning_rate": 0.00017935483870967742, |
|
"loss": 0.0967, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 81.6, |
|
"learning_rate": 0.00017774193548387095, |
|
"loss": 0.0919, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 81.76, |
|
"learning_rate": 0.0001761290322580645, |
|
"loss": 0.1022, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 81.92, |
|
"learning_rate": 0.00017451612903225807, |
|
"loss": 0.0926, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.8968432919954904, |
|
"eval_loss": 0.4353014826774597, |
|
"eval_runtime": 28.9657, |
|
"eval_samples_per_second": 61.245, |
|
"eval_steps_per_second": 0.967, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"learning_rate": 0.00017290322580645163, |
|
"loss": 0.0967, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 82.24, |
|
"learning_rate": 0.00017129032258064516, |
|
"loss": 0.0901, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 82.4, |
|
"learning_rate": 0.00016967741935483872, |
|
"loss": 0.1059, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 82.56, |
|
"learning_rate": 0.00016806451612903225, |
|
"loss": 0.0999, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 82.72, |
|
"learning_rate": 0.0001664516129032258, |
|
"loss": 0.0993, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 82.88, |
|
"learning_rate": 0.00016483870967741934, |
|
"loss": 0.0969, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 82.99, |
|
"eval_accuracy": 0.895152198421646, |
|
"eval_loss": 0.4335944652557373, |
|
"eval_runtime": 29.177, |
|
"eval_samples_per_second": 60.801, |
|
"eval_steps_per_second": 0.96, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 83.04, |
|
"learning_rate": 0.00016322580645161292, |
|
"loss": 0.088, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 83.2, |
|
"learning_rate": 0.00016161290322580645, |
|
"loss": 0.08, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 83.36, |
|
"learning_rate": 0.00016, |
|
"loss": 0.0899, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 83.52, |
|
"learning_rate": 0.00015838709677419354, |
|
"loss": 0.0938, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 83.68, |
|
"learning_rate": 0.0001567741935483871, |
|
"loss": 0.0986, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 83.84, |
|
"learning_rate": 0.00015516129032258063, |
|
"loss": 0.0947, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 0.0001535483870967742, |
|
"loss": 0.092, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.8934611048478016, |
|
"eval_loss": 0.4213511347770691, |
|
"eval_runtime": 28.9325, |
|
"eval_samples_per_second": 61.315, |
|
"eval_steps_per_second": 0.968, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 84.16, |
|
"learning_rate": 0.00015193548387096775, |
|
"loss": 0.0881, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 84.32, |
|
"learning_rate": 0.0001503225806451613, |
|
"loss": 0.0864, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 84.48, |
|
"learning_rate": 0.00014870967741935484, |
|
"loss": 0.0917, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 84.64, |
|
"learning_rate": 0.0001470967741935484, |
|
"loss": 0.0817, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 84.8, |
|
"learning_rate": 0.00014548387096774193, |
|
"loss": 0.1022, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 84.96, |
|
"learning_rate": 0.00014387096774193549, |
|
"loss": 0.0914, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 84.99, |
|
"eval_accuracy": 0.8889515219842165, |
|
"eval_loss": 0.4403364360332489, |
|
"eval_runtime": 28.9736, |
|
"eval_samples_per_second": 61.228, |
|
"eval_steps_per_second": 0.966, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 85.12, |
|
"learning_rate": 0.00014225806451612904, |
|
"loss": 0.0802, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 85.28, |
|
"learning_rate": 0.0001406451612903226, |
|
"loss": 0.0943, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 85.44, |
|
"learning_rate": 0.00013903225806451613, |
|
"loss": 0.1016, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 85.6, |
|
"learning_rate": 0.0001374193548387097, |
|
"loss": 0.1008, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 85.76, |
|
"learning_rate": 0.00013596774193548386, |
|
"loss": 0.0812, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 85.92, |
|
"learning_rate": 0.00013435483870967744, |
|
"loss": 0.0924, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.8928974069898534, |
|
"eval_loss": 0.4285109043121338, |
|
"eval_runtime": 28.8946, |
|
"eval_samples_per_second": 61.395, |
|
"eval_steps_per_second": 0.969, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 86.08, |
|
"learning_rate": 0.00013274193548387097, |
|
"loss": 0.0871, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 86.24, |
|
"learning_rate": 0.00013112903225806453, |
|
"loss": 0.0741, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 86.4, |
|
"learning_rate": 0.00012951612903225806, |
|
"loss": 0.0924, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 86.56, |
|
"learning_rate": 0.00012790322580645162, |
|
"loss": 0.0943, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 86.72, |
|
"learning_rate": 0.00012629032258064515, |
|
"loss": 0.0796, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 86.88, |
|
"learning_rate": 0.0001246774193548387, |
|
"loss": 0.0964, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 86.99, |
|
"eval_accuracy": 0.8968432919954904, |
|
"eval_loss": 0.4207240641117096, |
|
"eval_runtime": 28.9818, |
|
"eval_samples_per_second": 61.211, |
|
"eval_steps_per_second": 0.966, |
|
"step": 5437 |
|
}, |
|
{ |
|
"epoch": 87.04, |
|
"learning_rate": 0.00012306451612903227, |
|
"loss": 0.0912, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 87.2, |
|
"learning_rate": 0.0001214516129032258, |
|
"loss": 0.0889, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 87.36, |
|
"learning_rate": 0.00011983870967741936, |
|
"loss": 0.1015, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 87.52, |
|
"learning_rate": 0.0001182258064516129, |
|
"loss": 0.0959, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 87.68, |
|
"learning_rate": 0.00011661290322580645, |
|
"loss": 0.0663, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 87.84, |
|
"learning_rate": 0.000115, |
|
"loss": 0.0792, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 0.00011338709677419355, |
|
"loss": 0.0916, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.8945885005636979, |
|
"eval_loss": 0.42535075545310974, |
|
"eval_runtime": 29.0954, |
|
"eval_samples_per_second": 60.972, |
|
"eval_steps_per_second": 0.962, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 88.16, |
|
"learning_rate": 0.00011177419354838709, |
|
"loss": 0.0945, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 88.32, |
|
"learning_rate": 0.00011016129032258065, |
|
"loss": 0.0906, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 88.48, |
|
"learning_rate": 0.0001085483870967742, |
|
"loss": 0.0795, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 88.64, |
|
"learning_rate": 0.00010693548387096774, |
|
"loss": 0.0868, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 88.8, |
|
"learning_rate": 0.00010532258064516128, |
|
"loss": 0.1157, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 88.96, |
|
"learning_rate": 0.00010370967741935484, |
|
"loss": 0.0962, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 88.99, |
|
"eval_accuracy": 0.8979706877113867, |
|
"eval_loss": 0.42487961053848267, |
|
"eval_runtime": 28.9603, |
|
"eval_samples_per_second": 61.256, |
|
"eval_steps_per_second": 0.967, |
|
"step": 5562 |
|
}, |
|
{ |
|
"epoch": 89.12, |
|
"learning_rate": 0.00010209677419354839, |
|
"loss": 0.0886, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 89.28, |
|
"learning_rate": 0.00010048387096774193, |
|
"loss": 0.0987, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 89.44, |
|
"learning_rate": 9.887096774193549e-05, |
|
"loss": 0.0893, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 89.6, |
|
"learning_rate": 9.725806451612903e-05, |
|
"loss": 0.0829, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 89.76, |
|
"learning_rate": 9.564516129032258e-05, |
|
"loss": 0.0913, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 89.92, |
|
"learning_rate": 9.403225806451612e-05, |
|
"loss": 0.0927, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.8934611048478016, |
|
"eval_loss": 0.42423465847969055, |
|
"eval_runtime": 29.0096, |
|
"eval_samples_per_second": 61.152, |
|
"eval_steps_per_second": 0.965, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 90.08, |
|
"learning_rate": 9.241935483870968e-05, |
|
"loss": 0.0781, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 90.24, |
|
"learning_rate": 9.080645161290323e-05, |
|
"loss": 0.0949, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 90.4, |
|
"learning_rate": 8.919354838709677e-05, |
|
"loss": 0.0794, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 90.56, |
|
"learning_rate": 8.758064516129033e-05, |
|
"loss": 0.077, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 90.72, |
|
"learning_rate": 8.596774193548387e-05, |
|
"loss": 0.0918, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 90.88, |
|
"learning_rate": 8.435483870967742e-05, |
|
"loss": 0.0993, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 90.99, |
|
"eval_accuracy": 0.8985343855693348, |
|
"eval_loss": 0.423031747341156, |
|
"eval_runtime": 29.0485, |
|
"eval_samples_per_second": 61.07, |
|
"eval_steps_per_second": 0.964, |
|
"step": 5687 |
|
}, |
|
{ |
|
"epoch": 91.04, |
|
"learning_rate": 8.274193548387098e-05, |
|
"loss": 0.0744, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 91.2, |
|
"learning_rate": 8.112903225806452e-05, |
|
"loss": 0.0964, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 91.36, |
|
"learning_rate": 7.951612903225807e-05, |
|
"loss": 0.0918, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 91.52, |
|
"learning_rate": 7.790322580645161e-05, |
|
"loss": 0.0899, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 91.68, |
|
"learning_rate": 7.629032258064517e-05, |
|
"loss": 0.091, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 91.84, |
|
"learning_rate": 7.467741935483871e-05, |
|
"loss": 0.0965, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"learning_rate": 7.306451612903226e-05, |
|
"loss": 0.0893, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.8979706877113867, |
|
"eval_loss": 0.4228670597076416, |
|
"eval_runtime": 28.916, |
|
"eval_samples_per_second": 61.35, |
|
"eval_steps_per_second": 0.968, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 92.16, |
|
"learning_rate": 7.145161290322582e-05, |
|
"loss": 0.0952, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 92.32, |
|
"learning_rate": 6.983870967741936e-05, |
|
"loss": 0.0878, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 92.48, |
|
"learning_rate": 6.82258064516129e-05, |
|
"loss": 0.0769, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 92.64, |
|
"learning_rate": 6.661290322580646e-05, |
|
"loss": 0.0854, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 92.8, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 0.0816, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 92.96, |
|
"learning_rate": 6.338709677419355e-05, |
|
"loss": 0.0878, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 92.99, |
|
"eval_accuracy": 0.8985343855693348, |
|
"eval_loss": 0.4215339124202728, |
|
"eval_runtime": 29.07, |
|
"eval_samples_per_second": 61.025, |
|
"eval_steps_per_second": 0.963, |
|
"step": 5812 |
|
}, |
|
{ |
|
"epoch": 93.12, |
|
"learning_rate": 6.17741935483871e-05, |
|
"loss": 0.0916, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 93.28, |
|
"learning_rate": 6.016129032258064e-05, |
|
"loss": 0.0768, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 93.44, |
|
"learning_rate": 5.854838709677419e-05, |
|
"loss": 0.088, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 93.6, |
|
"learning_rate": 5.6935483870967744e-05, |
|
"loss": 0.0733, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 93.76, |
|
"learning_rate": 5.532258064516129e-05, |
|
"loss": 0.1046, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 93.92, |
|
"learning_rate": 5.370967741935484e-05, |
|
"loss": 0.0882, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.8979706877113867, |
|
"eval_loss": 0.42621222138404846, |
|
"eval_runtime": 29.148, |
|
"eval_samples_per_second": 60.862, |
|
"eval_steps_per_second": 0.961, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 94.08, |
|
"learning_rate": 5.2096774193548385e-05, |
|
"loss": 0.0759, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 94.24, |
|
"learning_rate": 5.0483870967741936e-05, |
|
"loss": 0.105, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 94.4, |
|
"learning_rate": 4.887096774193549e-05, |
|
"loss": 0.0882, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 94.56, |
|
"learning_rate": 4.725806451612903e-05, |
|
"loss": 0.0826, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 94.72, |
|
"learning_rate": 4.5645161290322584e-05, |
|
"loss": 0.0725, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 94.88, |
|
"learning_rate": 4.403225806451613e-05, |
|
"loss": 0.0854, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 94.99, |
|
"eval_accuracy": 0.8974069898534386, |
|
"eval_loss": 0.4256262481212616, |
|
"eval_runtime": 28.9502, |
|
"eval_samples_per_second": 61.278, |
|
"eval_steps_per_second": 0.967, |
|
"step": 5937 |
|
}, |
|
{ |
|
"epoch": 95.04, |
|
"learning_rate": 4.241935483870968e-05, |
|
"loss": 0.0928, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 95.2, |
|
"learning_rate": 4.080645161290323e-05, |
|
"loss": 0.0837, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 95.36, |
|
"learning_rate": 3.9193548387096776e-05, |
|
"loss": 0.0878, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 95.52, |
|
"learning_rate": 3.758064516129033e-05, |
|
"loss": 0.0809, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 95.68, |
|
"learning_rate": 3.596774193548387e-05, |
|
"loss": 0.0779, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 95.84, |
|
"learning_rate": 3.435483870967742e-05, |
|
"loss": 0.0817, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 3.2741935483870974e-05, |
|
"loss": 0.0795, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.9007891770011274, |
|
"eval_loss": 0.42286553978919983, |
|
"eval_runtime": 28.8836, |
|
"eval_samples_per_second": 61.419, |
|
"eval_steps_per_second": 0.969, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 96.16, |
|
"learning_rate": 3.112903225806452e-05, |
|
"loss": 0.0956, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 96.32, |
|
"learning_rate": 2.9516129032258063e-05, |
|
"loss": 0.0795, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 96.48, |
|
"learning_rate": 2.7903225806451615e-05, |
|
"loss": 0.0884, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 96.64, |
|
"learning_rate": 2.6290322580645163e-05, |
|
"loss": 0.0963, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 96.8, |
|
"learning_rate": 2.467741935483871e-05, |
|
"loss": 0.0844, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 96.96, |
|
"learning_rate": 2.306451612903226e-05, |
|
"loss": 0.0931, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 96.99, |
|
"eval_accuracy": 0.8990980834272829, |
|
"eval_loss": 0.42177480459213257, |
|
"eval_runtime": 28.9325, |
|
"eval_samples_per_second": 61.315, |
|
"eval_steps_per_second": 0.968, |
|
"step": 6062 |
|
}, |
|
{ |
|
"epoch": 97.12, |
|
"learning_rate": 2.1451612903225807e-05, |
|
"loss": 0.0921, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 97.28, |
|
"learning_rate": 1.9838709677419358e-05, |
|
"loss": 0.0805, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 97.44, |
|
"learning_rate": 1.8225806451612903e-05, |
|
"loss": 0.0815, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 97.6, |
|
"learning_rate": 1.661290322580645e-05, |
|
"loss": 0.0887, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 97.76, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0939, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 97.92, |
|
"learning_rate": 1.3387096774193548e-05, |
|
"loss": 0.0826, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.8985343855693348, |
|
"eval_loss": 0.42353904247283936, |
|
"eval_runtime": 29.0565, |
|
"eval_samples_per_second": 61.053, |
|
"eval_steps_per_second": 0.964, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 98.08, |
|
"learning_rate": 1.1774193548387096e-05, |
|
"loss": 0.0768, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 98.24, |
|
"learning_rate": 1.0161290322580644e-05, |
|
"loss": 0.0682, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 98.4, |
|
"learning_rate": 8.548387096774194e-06, |
|
"loss": 0.0767, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 98.56, |
|
"learning_rate": 6.935483870967742e-06, |
|
"loss": 0.081, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 98.72, |
|
"learning_rate": 5.32258064516129e-06, |
|
"loss": 0.0798, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 98.88, |
|
"learning_rate": 3.709677419354839e-06, |
|
"loss": 0.0926, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 98.99, |
|
"eval_accuracy": 0.8985343855693348, |
|
"eval_loss": 0.42374834418296814, |
|
"eval_runtime": 28.8905, |
|
"eval_samples_per_second": 61.404, |
|
"eval_steps_per_second": 0.969, |
|
"step": 6187 |
|
}, |
|
{ |
|
"epoch": 99.04, |
|
"learning_rate": 2.096774193548387e-06, |
|
"loss": 0.0832, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 99.2, |
|
"learning_rate": 4.838709677419355e-07, |
|
"loss": 0.0829, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 99.2, |
|
"eval_accuracy": 0.8985343855693348, |
|
"eval_loss": 0.4238373041152954, |
|
"eval_runtime": 29.1144, |
|
"eval_samples_per_second": 60.932, |
|
"eval_steps_per_second": 0.962, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 99.2, |
|
"step": 6200, |
|
"total_flos": 1.2844070249593489e+20, |
|
"train_loss": 0.21447962582111357, |
|
"train_runtime": 32541.0222, |
|
"train_samples_per_second": 49.037, |
|
"train_steps_per_second": 0.191 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6200, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 1.2844070249593489e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|