|
{ |
|
"best_metric": 0.029699677601456642, |
|
"best_model_checkpoint": "./save/eng-zho_elderly_facebook/wav2vec2-large-xlsr-53/checkpoint-2496", |
|
"epoch": 22.0, |
|
"global_step": 4576, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.997596153846154e-05, |
|
"loss": 0.0533, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.995192307692308e-05, |
|
"loss": 0.0523, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.992788461538462e-05, |
|
"loss": 0.0517, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9906250000000004e-05, |
|
"loss": 0.0497, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.988221153846154e-05, |
|
"loss": 0.0471, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.985817307692308e-05, |
|
"loss": 0.0442, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.983413461538462e-05, |
|
"loss": 0.0416, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.9810096153846156e-05, |
|
"loss": 0.0393, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.97860576923077e-05, |
|
"loss": 0.0424, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.9762019230769235e-05, |
|
"loss": 0.0414, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.973798076923077e-05, |
|
"loss": 0.0394, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.9713942307692314e-05, |
|
"loss": 0.0389, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.968990384615385e-05, |
|
"loss": 0.0427, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.966586538461539e-05, |
|
"loss": 0.0421, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.964182692307692e-05, |
|
"loss": 0.0402, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.961778846153846e-05, |
|
"loss": 0.0412, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.959375e-05, |
|
"loss": 0.0404, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.956971153846154e-05, |
|
"loss": 0.0408, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.9545673076923075e-05, |
|
"loss": 0.0391, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.952163461538462e-05, |
|
"loss": 0.0378, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.03970440477132797, |
|
"eval_runtime": 37.2853, |
|
"eval_samples_per_second": 17.406, |
|
"eval_steps_per_second": 2.199, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.9497596153846155e-05, |
|
"loss": 0.0407, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.947355769230769e-05, |
|
"loss": 0.0404, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.9449519230769234e-05, |
|
"loss": 0.0406, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.942548076923077e-05, |
|
"loss": 0.0377, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.940144230769231e-05, |
|
"loss": 0.0401, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.937740384615385e-05, |
|
"loss": 0.0391, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.9353365384615386e-05, |
|
"loss": 0.042, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.932932692307692e-05, |
|
"loss": 0.0371, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.9305288461538466e-05, |
|
"loss": 0.0405, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.928125e-05, |
|
"loss": 0.038, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.925721153846154e-05, |
|
"loss": 0.0384, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.923317307692308e-05, |
|
"loss": 0.0398, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.920913461538462e-05, |
|
"loss": 0.037, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.9185096153846154e-05, |
|
"loss": 0.0389, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.91610576923077e-05, |
|
"loss": 0.037, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.913701923076923e-05, |
|
"loss": 0.0375, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.911298076923077e-05, |
|
"loss": 0.0362, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.908894230769231e-05, |
|
"loss": 0.0405, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.906490384615385e-05, |
|
"loss": 0.0374, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.9040865384615385e-05, |
|
"loss": 0.0374, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.901682692307693e-05, |
|
"loss": 0.036, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.037033889442682266, |
|
"eval_runtime": 36.6058, |
|
"eval_samples_per_second": 17.729, |
|
"eval_steps_per_second": 2.24, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.8992788461538465e-05, |
|
"loss": 0.0362, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.896875e-05, |
|
"loss": 0.0391, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.894471153846154e-05, |
|
"loss": 0.0361, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.892067307692308e-05, |
|
"loss": 0.0384, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.889663461538462e-05, |
|
"loss": 0.0387, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.887259615384615e-05, |
|
"loss": 0.0333, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.8848557692307696e-05, |
|
"loss": 0.0371, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.882451923076923e-05, |
|
"loss": 0.0351, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.880048076923077e-05, |
|
"loss": 0.0356, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.877644230769231e-05, |
|
"loss": 0.0355, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.875240384615385e-05, |
|
"loss": 0.0369, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.8728365384615385e-05, |
|
"loss": 0.0349, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.870432692307693e-05, |
|
"loss": 0.0358, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 4.8680288461538464e-05, |
|
"loss": 0.0344, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 4.865625e-05, |
|
"loss": 0.0341, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.863221153846154e-05, |
|
"loss": 0.0367, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.860817307692308e-05, |
|
"loss": 0.0345, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.8584134615384616e-05, |
|
"loss": 0.0365, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.856009615384616e-05, |
|
"loss": 0.0334, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.8536057692307695e-05, |
|
"loss": 0.0356, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.851201923076923e-05, |
|
"loss": 0.0322, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.034713711589574814, |
|
"eval_runtime": 36.4637, |
|
"eval_samples_per_second": 17.799, |
|
"eval_steps_per_second": 2.249, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.8487980769230775e-05, |
|
"loss": 0.0354, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 4.846394230769231e-05, |
|
"loss": 0.0315, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 4.843990384615385e-05, |
|
"loss": 0.036, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 4.841586538461539e-05, |
|
"loss": 0.0358, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 4.839182692307693e-05, |
|
"loss": 0.0336, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 4.836778846153846e-05, |
|
"loss": 0.0349, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 4.8343750000000006e-05, |
|
"loss": 0.0342, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 4.831971153846154e-05, |
|
"loss": 0.0404, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 4.829567307692308e-05, |
|
"loss": 0.0317, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.827163461538462e-05, |
|
"loss": 0.0362, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 4.824759615384616e-05, |
|
"loss": 0.03, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 4.8223557692307695e-05, |
|
"loss": 0.0311, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 4.819951923076924e-05, |
|
"loss": 0.033, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.8175480769230774e-05, |
|
"loss": 0.0372, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 4.815144230769231e-05, |
|
"loss": 0.0349, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 4.812740384615385e-05, |
|
"loss": 0.0329, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 4.810336538461538e-05, |
|
"loss": 0.0324, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 4.8079326923076926e-05, |
|
"loss": 0.0313, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 4.805528846153846e-05, |
|
"loss": 0.0321, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.803125e-05, |
|
"loss": 0.0365, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.800721153846154e-05, |
|
"loss": 0.0299, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.033535003662109375, |
|
"eval_runtime": 36.6636, |
|
"eval_samples_per_second": 17.701, |
|
"eval_steps_per_second": 2.237, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.798317307692308e-05, |
|
"loss": 0.0355, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 4.7959134615384614e-05, |
|
"loss": 0.0303, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 4.793509615384616e-05, |
|
"loss": 0.0326, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 4.7911057692307694e-05, |
|
"loss": 0.0308, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 4.788701923076923e-05, |
|
"loss": 0.0325, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 4.7862980769230766e-05, |
|
"loss": 0.0357, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 4.783894230769231e-05, |
|
"loss": 0.0337, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 4.7814903846153846e-05, |
|
"loss": 0.0343, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 4.779086538461538e-05, |
|
"loss": 0.0287, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 4.7766826923076925e-05, |
|
"loss": 0.0294, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 4.774278846153846e-05, |
|
"loss": 0.0289, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 4.771875e-05, |
|
"loss": 0.03, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 4.769471153846154e-05, |
|
"loss": 0.0337, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 4.767067307692308e-05, |
|
"loss": 0.0335, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 4.7646634615384613e-05, |
|
"loss": 0.028, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.7622596153846157e-05, |
|
"loss": 0.0304, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 4.759855769230769e-05, |
|
"loss": 0.0324, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 4.757451923076923e-05, |
|
"loss": 0.0282, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 4.755048076923077e-05, |
|
"loss": 0.0306, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 4.752644230769231e-05, |
|
"loss": 0.0304, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.7502403846153845e-05, |
|
"loss": 0.0323, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.032066214829683304, |
|
"eval_runtime": 36.0368, |
|
"eval_samples_per_second": 18.009, |
|
"eval_steps_per_second": 2.275, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 4.747836538461539e-05, |
|
"loss": 0.0298, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 4.7454326923076924e-05, |
|
"loss": 0.0317, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 4.743028846153846e-05, |
|
"loss": 0.0309, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 4.7406250000000004e-05, |
|
"loss": 0.0315, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 4.738221153846154e-05, |
|
"loss": 0.0279, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 4.7358173076923076e-05, |
|
"loss": 0.0287, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 4.733413461538462e-05, |
|
"loss": 0.0286, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 4.7310096153846156e-05, |
|
"loss": 0.0301, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 4.728605769230769e-05, |
|
"loss": 0.0345, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 4.7262019230769235e-05, |
|
"loss": 0.03, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 4.723798076923077e-05, |
|
"loss": 0.0263, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 4.721394230769231e-05, |
|
"loss": 0.0278, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 4.718990384615385e-05, |
|
"loss": 0.0281, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 4.716586538461539e-05, |
|
"loss": 0.0304, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 4.7141826923076924e-05, |
|
"loss": 0.0269, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 4.7117788461538467e-05, |
|
"loss": 0.0301, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 4.709375e-05, |
|
"loss": 0.0284, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 4.706971153846154e-05, |
|
"loss": 0.0292, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 4.704567307692308e-05, |
|
"loss": 0.0304, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 4.702163461538462e-05, |
|
"loss": 0.0328, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.030981002375483513, |
|
"eval_runtime": 36.8216, |
|
"eval_samples_per_second": 17.626, |
|
"eval_steps_per_second": 2.227, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.6997596153846155e-05, |
|
"loss": 0.0265, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 4.69735576923077e-05, |
|
"loss": 0.029, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 4.6949519230769234e-05, |
|
"loss": 0.0273, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 4.692548076923077e-05, |
|
"loss": 0.0275, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 4.6901442307692314e-05, |
|
"loss": 0.0283, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 4.687740384615385e-05, |
|
"loss": 0.025, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 4.6853365384615386e-05, |
|
"loss": 0.023, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 4.682932692307693e-05, |
|
"loss": 0.0275, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 4.6805288461538466e-05, |
|
"loss": 0.0286, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 4.678125e-05, |
|
"loss": 0.0267, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 4.6757211538461545e-05, |
|
"loss": 0.031, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 4.673317307692308e-05, |
|
"loss": 0.0267, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 4.670913461538462e-05, |
|
"loss": 0.0283, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 4.668509615384616e-05, |
|
"loss": 0.0266, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 4.66610576923077e-05, |
|
"loss": 0.0277, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 4.6637019230769234e-05, |
|
"loss": 0.0298, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 4.6612980769230777e-05, |
|
"loss": 0.0294, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 4.658894230769231e-05, |
|
"loss": 0.0253, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 4.656490384615385e-05, |
|
"loss": 0.0264, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 4.6540865384615386e-05, |
|
"loss": 0.0288, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 4.651682692307692e-05, |
|
"loss": 0.024, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.031017007306218147, |
|
"eval_runtime": 37.113, |
|
"eval_samples_per_second": 17.487, |
|
"eval_steps_per_second": 2.209, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 4.649278846153846e-05, |
|
"loss": 0.0238, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 4.646875e-05, |
|
"loss": 0.0284, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 4.644471153846154e-05, |
|
"loss": 0.0248, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 4.6420673076923074e-05, |
|
"loss": 0.0234, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 4.639663461538462e-05, |
|
"loss": 0.0271, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 4.637259615384615e-05, |
|
"loss": 0.0267, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 4.634855769230769e-05, |
|
"loss": 0.0252, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 4.632451923076923e-05, |
|
"loss": 0.0257, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 4.630048076923077e-05, |
|
"loss": 0.0224, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 4.6276442307692305e-05, |
|
"loss": 0.0277, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 4.625240384615385e-05, |
|
"loss": 0.0297, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 4.6228365384615385e-05, |
|
"loss": 0.0254, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 4.620432692307692e-05, |
|
"loss": 0.0238, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 4.6180288461538464e-05, |
|
"loss": 0.0243, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 4.615625e-05, |
|
"loss": 0.0262, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 4.613221153846154e-05, |
|
"loss": 0.0308, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 4.610817307692308e-05, |
|
"loss": 0.022, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 4.6084134615384616e-05, |
|
"loss": 0.0236, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 4.606009615384615e-05, |
|
"loss": 0.0251, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 4.6036057692307696e-05, |
|
"loss": 0.0261, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 4.601201923076923e-05, |
|
"loss": 0.024, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.030306896194815636, |
|
"eval_runtime": 37.1939, |
|
"eval_samples_per_second": 17.449, |
|
"eval_steps_per_second": 2.205, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 4.598798076923077e-05, |
|
"loss": 0.0273, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 4.596394230769231e-05, |
|
"loss": 0.0256, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 4.593990384615385e-05, |
|
"loss": 0.0246, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 4.5915865384615384e-05, |
|
"loss": 0.0216, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 4.589182692307693e-05, |
|
"loss": 0.0237, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 4.586778846153846e-05, |
|
"loss": 0.0205, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 4.584375e-05, |
|
"loss": 0.0221, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 4.581971153846154e-05, |
|
"loss": 0.0282, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 4.579567307692308e-05, |
|
"loss": 0.0283, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 4.5771634615384615e-05, |
|
"loss": 0.0214, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 4.574759615384616e-05, |
|
"loss": 0.0267, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 4.5723557692307695e-05, |
|
"loss": 0.02, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 4.569951923076923e-05, |
|
"loss": 0.0234, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 4.5675480769230774e-05, |
|
"loss": 0.0269, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 4.565144230769231e-05, |
|
"loss": 0.0255, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 4.562740384615385e-05, |
|
"loss": 0.0256, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 4.560336538461539e-05, |
|
"loss": 0.0227, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 4.5579326923076926e-05, |
|
"loss": 0.0233, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 4.555528846153846e-05, |
|
"loss": 0.0235, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 4.5531250000000006e-05, |
|
"loss": 0.029, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 4.550721153846154e-05, |
|
"loss": 0.0252, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.030633406713604927, |
|
"eval_runtime": 37.8394, |
|
"eval_samples_per_second": 17.151, |
|
"eval_steps_per_second": 2.167, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 4.548317307692308e-05, |
|
"loss": 0.0241, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.545913461538462e-05, |
|
"loss": 0.0207, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 4.543509615384616e-05, |
|
"loss": 0.0219, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 4.5411057692307694e-05, |
|
"loss": 0.0226, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 4.538701923076924e-05, |
|
"loss": 0.0205, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 4.536298076923077e-05, |
|
"loss": 0.0209, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 4.533894230769231e-05, |
|
"loss": 0.0232, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 4.531490384615385e-05, |
|
"loss": 0.0241, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 4.529086538461539e-05, |
|
"loss": 0.022, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 4.5266826923076925e-05, |
|
"loss": 0.0231, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 4.524278846153847e-05, |
|
"loss": 0.0242, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 4.5218750000000005e-05, |
|
"loss": 0.0247, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 4.519471153846154e-05, |
|
"loss": 0.0225, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 4.5170673076923084e-05, |
|
"loss": 0.032, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 4.514663461538462e-05, |
|
"loss": 0.022, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 4.512259615384616e-05, |
|
"loss": 0.0184, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 4.50985576923077e-05, |
|
"loss": 0.0235, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 4.5074519230769236e-05, |
|
"loss": 0.0215, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 4.505048076923077e-05, |
|
"loss": 0.0241, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 4.5026442307692316e-05, |
|
"loss": 0.0193, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.500240384615385e-05, |
|
"loss": 0.0203, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.030458811670541763, |
|
"eval_runtime": 36.2671, |
|
"eval_samples_per_second": 17.895, |
|
"eval_steps_per_second": 2.261, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 4.497836538461538e-05, |
|
"loss": 0.0231, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 4.4954326923076925e-05, |
|
"loss": 0.0237, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 4.493028846153846e-05, |
|
"loss": 0.0193, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 4.490625e-05, |
|
"loss": 0.0202, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 4.488221153846154e-05, |
|
"loss": 0.0246, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 4.4858173076923077e-05, |
|
"loss": 0.0245, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 4.483413461538461e-05, |
|
"loss": 0.02, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 4.4810096153846156e-05, |
|
"loss": 0.0224, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 4.478605769230769e-05, |
|
"loss": 0.0209, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 4.476201923076923e-05, |
|
"loss": 0.02, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 4.473798076923077e-05, |
|
"loss": 0.0233, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 4.471634615384616e-05, |
|
"loss": 0.0222, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 4.4692307692307693e-05, |
|
"loss": 0.0233, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 4.466826923076923e-05, |
|
"loss": 0.0216, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 4.464423076923077e-05, |
|
"loss": 0.0239, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 4.462019230769231e-05, |
|
"loss": 0.019, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 4.4596153846153845e-05, |
|
"loss": 0.023, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 4.457211538461539e-05, |
|
"loss": 0.0218, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 4.4548076923076925e-05, |
|
"loss": 0.0223, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 4.452403846153846e-05, |
|
"loss": 0.0175, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.030906477943062782, |
|
"eval_runtime": 36.4911, |
|
"eval_samples_per_second": 17.785, |
|
"eval_steps_per_second": 2.247, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"loss": 0.0194, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 4.447596153846154e-05, |
|
"loss": 0.0205, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 4.445192307692308e-05, |
|
"loss": 0.0234, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 4.442788461538462e-05, |
|
"loss": 0.0202, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.4403846153846156e-05, |
|
"loss": 0.0191, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 4.437980769230769e-05, |
|
"loss": 0.0176, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 4.4355769230769236e-05, |
|
"loss": 0.0175, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 4.433173076923077e-05, |
|
"loss": 0.0194, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"learning_rate": 4.430769230769231e-05, |
|
"loss": 0.0177, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 4.428365384615385e-05, |
|
"loss": 0.0182, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 4.425961538461539e-05, |
|
"loss": 0.0225, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 4.4235576923076924e-05, |
|
"loss": 0.0189, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 4.421153846153847e-05, |
|
"loss": 0.0183, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 4.4187500000000003e-05, |
|
"loss": 0.0233, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 4.416346153846154e-05, |
|
"loss": 0.0208, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 4.413942307692308e-05, |
|
"loss": 0.0189, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 4.411538461538462e-05, |
|
"loss": 0.0201, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 4.4091346153846155e-05, |
|
"loss": 0.0223, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 4.40673076923077e-05, |
|
"loss": 0.0216, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"learning_rate": 4.4043269230769235e-05, |
|
"loss": 0.0187, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 4.4021634615384613e-05, |
|
"loss": 0.0243, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.029699677601456642, |
|
"eval_runtime": 36.7497, |
|
"eval_samples_per_second": 17.66, |
|
"eval_steps_per_second": 2.231, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 4.3997596153846157e-05, |
|
"loss": 0.02, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 4.397355769230769e-05, |
|
"loss": 0.0165, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 4.394951923076923e-05, |
|
"loss": 0.0175, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 4.392548076923077e-05, |
|
"loss": 0.0178, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 4.390144230769231e-05, |
|
"loss": 0.0136, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 4.3877403846153845e-05, |
|
"loss": 0.0174, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 4.385336538461539e-05, |
|
"loss": 0.0209, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 4.3829326923076924e-05, |
|
"loss": 0.0191, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 4.380528846153846e-05, |
|
"loss": 0.0236, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 4.3781250000000004e-05, |
|
"loss": 0.0187, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 4.375721153846154e-05, |
|
"loss": 0.0182, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 4.3733173076923076e-05, |
|
"loss": 0.0142, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 4.370913461538462e-05, |
|
"loss": 0.0159, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 4.3685096153846156e-05, |
|
"loss": 0.0178, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 4.366105769230769e-05, |
|
"loss": 0.018, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 4.3637019230769235e-05, |
|
"loss": 0.0208, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 4.361298076923077e-05, |
|
"loss": 0.0181, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 12.84, |
|
"learning_rate": 4.358894230769231e-05, |
|
"loss": 0.0179, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"learning_rate": 4.356490384615385e-05, |
|
"loss": 0.019, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 4.354086538461539e-05, |
|
"loss": 0.0203, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 4.3516826923076923e-05, |
|
"loss": 0.0197, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.030876118689775467, |
|
"eval_runtime": 36.3232, |
|
"eval_samples_per_second": 17.867, |
|
"eval_steps_per_second": 2.258, |
|
"step": 2704 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 4.3492788461538467e-05, |
|
"loss": 0.0169, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"learning_rate": 4.346875e-05, |
|
"loss": 0.0151, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 4.344471153846154e-05, |
|
"loss": 0.0147, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"learning_rate": 4.342067307692308e-05, |
|
"loss": 0.0173, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 4.339663461538462e-05, |
|
"loss": 0.0183, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 4.3372596153846155e-05, |
|
"loss": 0.0158, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 4.33485576923077e-05, |
|
"loss": 0.0146, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 4.3324519230769234e-05, |
|
"loss": 0.0236, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 4.330048076923077e-05, |
|
"loss": 0.0154, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 4.3276442307692314e-05, |
|
"loss": 0.0166, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 4.325240384615385e-05, |
|
"loss": 0.018, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 4.3228365384615386e-05, |
|
"loss": 0.0166, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 4.320432692307693e-05, |
|
"loss": 0.0177, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 4.3180288461538466e-05, |
|
"loss": 0.0201, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 4.315625e-05, |
|
"loss": 0.02, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 4.3132211538461545e-05, |
|
"loss": 0.0243, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 4.310817307692308e-05, |
|
"loss": 0.0198, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"learning_rate": 4.308413461538462e-05, |
|
"loss": 0.0182, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 4.306009615384616e-05, |
|
"loss": 0.0168, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 4.30360576923077e-05, |
|
"loss": 0.0161, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 4.3012019230769233e-05, |
|
"loss": 0.0154, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.031427882611751556, |
|
"eval_runtime": 36.8364, |
|
"eval_samples_per_second": 17.618, |
|
"eval_steps_per_second": 2.226, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 4.2987980769230777e-05, |
|
"loss": 0.0116, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 4.296394230769231e-05, |
|
"loss": 0.0126, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 4.293990384615385e-05, |
|
"loss": 0.0153, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 4.291586538461539e-05, |
|
"loss": 0.0166, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 4.289182692307693e-05, |
|
"loss": 0.0139, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 4.2867788461538465e-05, |
|
"loss": 0.0149, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 4.284375000000001e-05, |
|
"loss": 0.0205, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 4.2819711538461544e-05, |
|
"loss": 0.0156, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 4.2795673076923074e-05, |
|
"loss": 0.0158, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 4.277163461538462e-05, |
|
"loss": 0.0158, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 4.274759615384615e-05, |
|
"loss": 0.0208, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 4.272355769230769e-05, |
|
"loss": 0.0187, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 4.269951923076923e-05, |
|
"loss": 0.016, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"learning_rate": 4.267548076923077e-05, |
|
"loss": 0.0163, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 4.2651442307692305e-05, |
|
"loss": 0.0152, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 14.76, |
|
"learning_rate": 4.262740384615385e-05, |
|
"loss": 0.016, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 4.2603365384615385e-05, |
|
"loss": 0.0124, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 4.257932692307692e-05, |
|
"loss": 0.0159, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 4.2555288461538464e-05, |
|
"loss": 0.0187, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 4.253125e-05, |
|
"loss": 0.0201, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.250721153846154e-05, |
|
"loss": 0.0123, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.03161365166306496, |
|
"eval_runtime": 36.679, |
|
"eval_samples_per_second": 17.694, |
|
"eval_steps_per_second": 2.236, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"learning_rate": 4.248317307692308e-05, |
|
"loss": 0.0139, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 4.2459134615384616e-05, |
|
"loss": 0.0143, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 4.243509615384615e-05, |
|
"loss": 0.0156, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 4.2411057692307696e-05, |
|
"loss": 0.0134, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 4.238701923076923e-05, |
|
"loss": 0.0168, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 4.236298076923077e-05, |
|
"loss": 0.0119, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 15.34, |
|
"learning_rate": 4.233894230769231e-05, |
|
"loss": 0.0135, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 4.231490384615385e-05, |
|
"loss": 0.0159, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 4.2290865384615384e-05, |
|
"loss": 0.0122, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 4.226682692307693e-05, |
|
"loss": 0.0167, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 4.224278846153846e-05, |
|
"loss": 0.0142, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 4.221875e-05, |
|
"loss": 0.0159, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 4.219471153846154e-05, |
|
"loss": 0.0139, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 4.217067307692308e-05, |
|
"loss": 0.0111, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 4.2146634615384615e-05, |
|
"loss": 0.0116, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 15.77, |
|
"learning_rate": 4.212259615384616e-05, |
|
"loss": 0.0171, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"learning_rate": 4.2098557692307695e-05, |
|
"loss": 0.0125, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 4.207451923076923e-05, |
|
"loss": 0.0187, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 4.2050480769230774e-05, |
|
"loss": 0.0113, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"learning_rate": 4.202644230769231e-05, |
|
"loss": 0.0128, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.03363659605383873, |
|
"eval_runtime": 36.1996, |
|
"eval_samples_per_second": 17.928, |
|
"eval_steps_per_second": 2.265, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 4.200240384615385e-05, |
|
"loss": 0.0111, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 4.197836538461539e-05, |
|
"loss": 0.0129, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 4.1954326923076926e-05, |
|
"loss": 0.0149, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"learning_rate": 4.193028846153846e-05, |
|
"loss": 0.0091, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 4.1906250000000006e-05, |
|
"loss": 0.0126, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 4.188221153846154e-05, |
|
"loss": 0.0103, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 4.185817307692308e-05, |
|
"loss": 0.0146, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"learning_rate": 4.183413461538462e-05, |
|
"loss": 0.0134, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"learning_rate": 4.181009615384616e-05, |
|
"loss": 0.0124, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 4.1786057692307694e-05, |
|
"loss": 0.0127, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 4.176201923076924e-05, |
|
"loss": 0.0165, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 16.54, |
|
"learning_rate": 4.173798076923077e-05, |
|
"loss": 0.0154, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 16.59, |
|
"learning_rate": 4.171394230769231e-05, |
|
"loss": 0.0126, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 4.168990384615385e-05, |
|
"loss": 0.013, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 16.68, |
|
"learning_rate": 4.166586538461539e-05, |
|
"loss": 0.0146, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"learning_rate": 4.1641826923076925e-05, |
|
"loss": 0.0128, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"learning_rate": 4.161778846153846e-05, |
|
"loss": 0.0119, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"learning_rate": 4.1593750000000005e-05, |
|
"loss": 0.0114, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"learning_rate": 4.156971153846154e-05, |
|
"loss": 0.0125, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 16.92, |
|
"learning_rate": 4.154567307692308e-05, |
|
"loss": 0.016, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 16.97, |
|
"learning_rate": 4.152163461538462e-05, |
|
"loss": 0.0136, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.03450627624988556, |
|
"eval_runtime": 36.2674, |
|
"eval_samples_per_second": 17.895, |
|
"eval_steps_per_second": 2.261, |
|
"step": 3536 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 4.149759615384616e-05, |
|
"loss": 0.02, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 4.147355769230769e-05, |
|
"loss": 0.012, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"learning_rate": 4.1449519230769236e-05, |
|
"loss": 0.0096, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 4.142548076923077e-05, |
|
"loss": 0.0141, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 4.140144230769231e-05, |
|
"loss": 0.0109, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 17.26, |
|
"learning_rate": 4.137740384615385e-05, |
|
"loss": 0.013, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 4.135336538461539e-05, |
|
"loss": 0.0121, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 17.36, |
|
"learning_rate": 4.1329326923076925e-05, |
|
"loss": 0.0144, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"learning_rate": 4.130528846153847e-05, |
|
"loss": 0.0122, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 4.1281250000000004e-05, |
|
"loss": 0.0151, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 4.125721153846154e-05, |
|
"loss": 0.0109, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 4.1233173076923077e-05, |
|
"loss": 0.0117, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 4.120913461538461e-05, |
|
"loss": 0.0118, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 17.64, |
|
"learning_rate": 4.1185096153846156e-05, |
|
"loss": 0.0117, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 17.69, |
|
"learning_rate": 4.116105769230769e-05, |
|
"loss": 0.0156, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"learning_rate": 4.113701923076923e-05, |
|
"loss": 0.0168, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 4.111298076923077e-05, |
|
"loss": 0.0141, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"learning_rate": 4.108894230769231e-05, |
|
"loss": 0.0131, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 17.88, |
|
"learning_rate": 4.1064903846153844e-05, |
|
"loss": 0.011, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 4.104086538461539e-05, |
|
"loss": 0.0113, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"learning_rate": 4.1016826923076924e-05, |
|
"loss": 0.0124, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.036256324499845505, |
|
"eval_runtime": 37.0725, |
|
"eval_samples_per_second": 17.506, |
|
"eval_steps_per_second": 2.212, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 4.099278846153846e-05, |
|
"loss": 0.0102, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 4.096875e-05, |
|
"loss": 0.0132, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"learning_rate": 4.094471153846154e-05, |
|
"loss": 0.0107, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 18.17, |
|
"learning_rate": 4.0920673076923076e-05, |
|
"loss": 0.01, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 4.089663461538462e-05, |
|
"loss": 0.0119, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 18.27, |
|
"learning_rate": 4.0872596153846155e-05, |
|
"loss": 0.0115, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 4.084855769230769e-05, |
|
"loss": 0.0111, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 4.0824519230769235e-05, |
|
"loss": 0.013, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 4.080048076923077e-05, |
|
"loss": 0.0093, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 4.077644230769231e-05, |
|
"loss": 0.0108, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 4.075240384615385e-05, |
|
"loss": 0.0111, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 4.0728365384615387e-05, |
|
"loss": 0.0093, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 4.070432692307692e-05, |
|
"loss": 0.0111, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"learning_rate": 4.0680288461538466e-05, |
|
"loss": 0.0124, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 4.065625e-05, |
|
"loss": 0.0126, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 4.063221153846154e-05, |
|
"loss": 0.0137, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 4.060817307692308e-05, |
|
"loss": 0.013, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"learning_rate": 4.058413461538462e-05, |
|
"loss": 0.0106, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"learning_rate": 4.0562500000000003e-05, |
|
"loss": 0.0139, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 4.053846153846154e-05, |
|
"loss": 0.0144, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 4.051442307692308e-05, |
|
"loss": 0.0124, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.03690133988857269, |
|
"eval_runtime": 36.3716, |
|
"eval_samples_per_second": 17.844, |
|
"eval_steps_per_second": 2.255, |
|
"step": 3952 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 4.049038461538462e-05, |
|
"loss": 0.0086, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 19.09, |
|
"learning_rate": 4.0466346153846155e-05, |
|
"loss": 0.0082, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 4.04423076923077e-05, |
|
"loss": 0.013, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"learning_rate": 4.0418269230769235e-05, |
|
"loss": 0.0104, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 4.039423076923077e-05, |
|
"loss": 0.0083, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 19.28, |
|
"learning_rate": 4.0370192307692314e-05, |
|
"loss": 0.0122, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 19.33, |
|
"learning_rate": 4.034615384615385e-05, |
|
"loss": 0.0082, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 19.38, |
|
"learning_rate": 4.032211538461539e-05, |
|
"loss": 0.0107, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 19.42, |
|
"learning_rate": 4.029807692307693e-05, |
|
"loss": 0.0063, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 4.0274038461538466e-05, |
|
"loss": 0.0081, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 4.025e-05, |
|
"loss": 0.0078, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 4.0225961538461546e-05, |
|
"loss": 0.0159, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 19.62, |
|
"learning_rate": 4.020192307692308e-05, |
|
"loss": 0.0099, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 19.66, |
|
"learning_rate": 4.017788461538462e-05, |
|
"loss": 0.0128, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 4.0153846153846155e-05, |
|
"loss": 0.0109, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"learning_rate": 4.012980769230769e-05, |
|
"loss": 0.0122, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 4.010576923076923e-05, |
|
"loss": 0.01, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 19.86, |
|
"learning_rate": 4.008173076923077e-05, |
|
"loss": 0.0146, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 19.9, |
|
"learning_rate": 4.005769230769231e-05, |
|
"loss": 0.0099, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"learning_rate": 4.003365384615384e-05, |
|
"loss": 0.0096, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 4.0009615384615386e-05, |
|
"loss": 0.0151, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.03828124701976776, |
|
"eval_runtime": 36.5852, |
|
"eval_samples_per_second": 17.739, |
|
"eval_steps_per_second": 2.241, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 20.05, |
|
"learning_rate": 3.998557692307692e-05, |
|
"loss": 0.0074, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 20.1, |
|
"learning_rate": 3.996153846153846e-05, |
|
"loss": 0.0118, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 20.14, |
|
"learning_rate": 3.99375e-05, |
|
"loss": 0.0101, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"learning_rate": 3.991346153846154e-05, |
|
"loss": 0.0089, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 20.24, |
|
"learning_rate": 3.9889423076923074e-05, |
|
"loss": 0.0102, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"learning_rate": 3.986538461538462e-05, |
|
"loss": 0.0116, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"learning_rate": 3.9841346153846154e-05, |
|
"loss": 0.0092, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 20.38, |
|
"learning_rate": 3.981730769230769e-05, |
|
"loss": 0.0083, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 3.979326923076923e-05, |
|
"loss": 0.0107, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 20.48, |
|
"learning_rate": 3.976923076923077e-05, |
|
"loss": 0.0078, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 20.53, |
|
"learning_rate": 3.9745192307692306e-05, |
|
"loss": 0.0101, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 20.58, |
|
"learning_rate": 3.972115384615385e-05, |
|
"loss": 0.0075, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 20.62, |
|
"learning_rate": 3.9697115384615385e-05, |
|
"loss": 0.0103, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 20.67, |
|
"learning_rate": 3.967307692307692e-05, |
|
"loss": 0.012, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 20.72, |
|
"learning_rate": 3.9649038461538465e-05, |
|
"loss": 0.0123, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 20.77, |
|
"learning_rate": 3.9625e-05, |
|
"loss": 0.007, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 20.82, |
|
"learning_rate": 3.960096153846154e-05, |
|
"loss": 0.009, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"learning_rate": 3.957692307692308e-05, |
|
"loss": 0.0071, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 20.91, |
|
"learning_rate": 3.955288461538462e-05, |
|
"loss": 0.0079, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"learning_rate": 3.952884615384615e-05, |
|
"loss": 0.0097, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.04113055393099785, |
|
"eval_runtime": 36.7735, |
|
"eval_samples_per_second": 17.649, |
|
"eval_steps_per_second": 2.23, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 3.9504807692307696e-05, |
|
"loss": 0.0104, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 21.06, |
|
"learning_rate": 3.948076923076923e-05, |
|
"loss": 0.0086, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 21.11, |
|
"learning_rate": 3.945673076923077e-05, |
|
"loss": 0.0127, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"learning_rate": 3.943269230769231e-05, |
|
"loss": 0.008, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 3.940865384615385e-05, |
|
"loss": 0.0073, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 21.25, |
|
"learning_rate": 3.9384615384615384e-05, |
|
"loss": 0.0087, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 21.3, |
|
"learning_rate": 3.936057692307693e-05, |
|
"loss": 0.0089, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 21.35, |
|
"learning_rate": 3.9336538461538464e-05, |
|
"loss": 0.0097, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 21.39, |
|
"learning_rate": 3.93125e-05, |
|
"loss": 0.0054, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"learning_rate": 3.928846153846154e-05, |
|
"loss": 0.0079, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 21.49, |
|
"learning_rate": 3.926442307692308e-05, |
|
"loss": 0.009, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 21.54, |
|
"learning_rate": 3.9240384615384616e-05, |
|
"loss": 0.01, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 21.59, |
|
"learning_rate": 3.921634615384616e-05, |
|
"loss": 0.0085, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 21.63, |
|
"learning_rate": 3.9192307692307695e-05, |
|
"loss": 0.0094, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 21.68, |
|
"learning_rate": 3.916826923076923e-05, |
|
"loss": 0.0128, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 21.73, |
|
"learning_rate": 3.9144230769230775e-05, |
|
"loss": 0.0077, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 21.78, |
|
"learning_rate": 3.912019230769231e-05, |
|
"loss": 0.0073, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 21.83, |
|
"learning_rate": 3.909615384615385e-05, |
|
"loss": 0.0083, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"learning_rate": 3.907211538461539e-05, |
|
"loss": 0.0112, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 21.92, |
|
"learning_rate": 3.904807692307693e-05, |
|
"loss": 0.0076, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"learning_rate": 3.902403846153846e-05, |
|
"loss": 0.0084, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.03989782929420471, |
|
"eval_runtime": 36.902, |
|
"eval_samples_per_second": 17.587, |
|
"eval_steps_per_second": 2.222, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"step": 4576, |
|
"total_flos": 5.233965363865186e+18, |
|
"train_loss": 0.02212856354686868, |
|
"train_runtime": 7388.5591, |
|
"train_samples_per_second": 22.521, |
|
"train_steps_per_second": 2.815 |
|
} |
|
], |
|
"max_steps": 20800, |
|
"num_train_epochs": 100, |
|
"total_flos": 5.233965363865186e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|