|
{ |
|
"best_metric": 0.6401488780975342, |
|
"best_model_checkpoint": "./exper1_mesum5/checkpoint-1700", |
|
"epoch": 4.0, |
|
"global_step": 1720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001988372093023256, |
|
"loss": 4.9469, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019767441860465116, |
|
"loss": 4.8606, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019651162790697676, |
|
"loss": 4.6917, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019534883720930232, |
|
"loss": 4.5534, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001941860465116279, |
|
"loss": 4.4467, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001930232558139535, |
|
"loss": 4.2795, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001918604651162791, |
|
"loss": 4.2333, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019069767441860466, |
|
"loss": 4.099, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00018953488372093025, |
|
"loss": 3.9539, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00018837209302325584, |
|
"loss": 3.9352, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.19585798816568048, |
|
"eval_loss": 3.8550209999084473, |
|
"eval_runtime": 19.1544, |
|
"eval_samples_per_second": 88.23, |
|
"eval_steps_per_second": 11.068, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001872093023255814, |
|
"loss": 3.8303, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000186046511627907, |
|
"loss": 3.7242, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00018488372093023256, |
|
"loss": 3.5167, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00018372093023255815, |
|
"loss": 3.57, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001825581395348837, |
|
"loss": 3.5231, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001813953488372093, |
|
"loss": 3.4206, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001802325581395349, |
|
"loss": 3.4456, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017906976744186048, |
|
"loss": 3.3114, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00017790697674418605, |
|
"loss": 3.3379, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017674418604651164, |
|
"loss": 3.1536, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.28875739644970416, |
|
"eval_loss": 3.175450086593628, |
|
"eval_runtime": 21.4845, |
|
"eval_samples_per_second": 78.662, |
|
"eval_steps_per_second": 9.868, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00017558139534883723, |
|
"loss": 3.0307, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001744186046511628, |
|
"loss": 3.0889, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00017325581395348838, |
|
"loss": 3.0574, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00017209302325581395, |
|
"loss": 3.1058, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00017093023255813954, |
|
"loss": 2.9981, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001697674418604651, |
|
"loss": 2.8662, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00016860465116279072, |
|
"loss": 2.7418, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00016744186046511629, |
|
"loss": 2.9494, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00016639534883720932, |
|
"loss": 2.7704, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00016523255813953488, |
|
"loss": 2.6937, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.4272189349112426, |
|
"eval_loss": 2.633167266845703, |
|
"eval_runtime": 21.2397, |
|
"eval_samples_per_second": 79.568, |
|
"eval_steps_per_second": 9.981, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00016406976744186047, |
|
"loss": 2.6292, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016290697674418604, |
|
"loss": 2.641, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00016174418604651163, |
|
"loss": 2.6534, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00016058139534883722, |
|
"loss": 2.5655, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0001594186046511628, |
|
"loss": 2.4129, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00015825581395348837, |
|
"loss": 2.3514, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00015709302325581396, |
|
"loss": 2.4527, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00015593023255813955, |
|
"loss": 2.4411, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00015476744186046512, |
|
"loss": 2.2639, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0001536046511627907, |
|
"loss": 2.3748, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.4970414201183432, |
|
"eval_loss": 2.28334641456604, |
|
"eval_runtime": 21.0937, |
|
"eval_samples_per_second": 80.119, |
|
"eval_steps_per_second": 10.05, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00015244186046511627, |
|
"loss": 2.3598, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00015127906976744186, |
|
"loss": 2.3091, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015011627906976743, |
|
"loss": 2.1695, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00014895348837209302, |
|
"loss": 1.951, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001477906976744186, |
|
"loss": 1.8001, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0001466279069767442, |
|
"loss": 1.7164, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001454651162790698, |
|
"loss": 1.8812, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00014430232558139536, |
|
"loss": 1.7576, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00014313953488372095, |
|
"loss": 1.7459, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001419767441860465, |
|
"loss": 1.5575, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.5887573964497042, |
|
"eval_loss": 1.8712348937988281, |
|
"eval_runtime": 21.4456, |
|
"eval_samples_per_second": 78.804, |
|
"eval_steps_per_second": 9.885, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0001408139534883721, |
|
"loss": 1.6987, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00013965116279069767, |
|
"loss": 1.6239, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00013848837209302326, |
|
"loss": 1.656, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00013732558139534885, |
|
"loss": 1.43, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00013616279069767444, |
|
"loss": 1.5609, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00013500000000000003, |
|
"loss": 1.5339, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001338372093023256, |
|
"loss": 1.4529, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00013267441860465118, |
|
"loss": 1.453, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00013151162790697675, |
|
"loss": 1.4002, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00013034883720930234, |
|
"loss": 1.4063, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.6313609467455621, |
|
"eval_loss": 1.6047611236572266, |
|
"eval_runtime": 21.4699, |
|
"eval_samples_per_second": 78.715, |
|
"eval_steps_per_second": 9.874, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0001291860465116279, |
|
"loss": 1.4063, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0001280232558139535, |
|
"loss": 1.3656, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00012686046511627906, |
|
"loss": 1.2191, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00012569767441860465, |
|
"loss": 1.3991, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00012453488372093024, |
|
"loss": 1.2237, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00012337209302325583, |
|
"loss": 1.1939, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00012220930232558142, |
|
"loss": 1.2683, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00012104651162790698, |
|
"loss": 1.2969, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00011988372093023256, |
|
"loss": 1.1475, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00011872093023255815, |
|
"loss": 1.1841, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.6621301775147929, |
|
"eval_loss": 1.4109498262405396, |
|
"eval_runtime": 19.5984, |
|
"eval_samples_per_second": 86.231, |
|
"eval_steps_per_second": 10.817, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00011755813953488373, |
|
"loss": 1.2236, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00011639534883720931, |
|
"loss": 1.3392, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00011523255813953489, |
|
"loss": 1.2305, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00011406976744186046, |
|
"loss": 1.1766, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00011290697674418604, |
|
"loss": 1.164, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00011174418604651162, |
|
"loss": 1.2152, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00011058139534883722, |
|
"loss": 1.0498, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0001094186046511628, |
|
"loss": 1.1407, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00010825581395348838, |
|
"loss": 1.1325, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00010709302325581397, |
|
"loss": 1.0857, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": 0.7112426035502959, |
|
"eval_loss": 1.1831614971160889, |
|
"eval_runtime": 19.5421, |
|
"eval_samples_per_second": 86.48, |
|
"eval_steps_per_second": 10.848, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00010593023255813955, |
|
"loss": 0.9263, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010476744186046512, |
|
"loss": 0.9742, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0001036046511627907, |
|
"loss": 1.0176, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010244186046511628, |
|
"loss": 1.0105, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010127906976744185, |
|
"loss": 1.0131, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010011627906976743, |
|
"loss": 0.9204, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.895348837209302e-05, |
|
"loss": 0.6221, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.779069767441861e-05, |
|
"loss": 0.6229, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.662790697674419e-05, |
|
"loss": 0.7227, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.546511627906978e-05, |
|
"loss": 0.582, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_accuracy": 0.7479289940828402, |
|
"eval_loss": 1.0371233224868774, |
|
"eval_runtime": 19.7303, |
|
"eval_samples_per_second": 85.655, |
|
"eval_steps_per_second": 10.745, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.430232558139536e-05, |
|
"loss": 0.6454, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.313953488372094e-05, |
|
"loss": 0.6227, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.197674418604651e-05, |
|
"loss": 0.4867, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 9.081395348837209e-05, |
|
"loss": 0.4923, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.965116279069767e-05, |
|
"loss": 0.5217, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.848837209302326e-05, |
|
"loss": 0.4372, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.732558139534884e-05, |
|
"loss": 0.567, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.616279069767443e-05, |
|
"loss": 0.5837, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 8.5e-05, |
|
"loss": 0.5339, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 8.383720930232558e-05, |
|
"loss": 0.5971, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.7461538461538462, |
|
"eval_loss": 0.9839160442352295, |
|
"eval_runtime": 19.6324, |
|
"eval_samples_per_second": 86.082, |
|
"eval_steps_per_second": 10.798, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 8.267441860465117e-05, |
|
"loss": 0.5588, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 8.151162790697675e-05, |
|
"loss": 0.5793, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 8.034883720930233e-05, |
|
"loss": 0.475, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 7.91860465116279e-05, |
|
"loss": 0.5391, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 7.802325581395348e-05, |
|
"loss": 0.487, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 7.686046511627908e-05, |
|
"loss": 0.4474, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 7.569767441860465e-05, |
|
"loss": 0.4753, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 7.453488372093024e-05, |
|
"loss": 0.3932, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.337209302325582e-05, |
|
"loss": 0.4907, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.22093023255814e-05, |
|
"loss": 0.4617, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_accuracy": 0.7656804733727811, |
|
"eval_loss": 0.9233230948448181, |
|
"eval_runtime": 19.3219, |
|
"eval_samples_per_second": 87.465, |
|
"eval_steps_per_second": 10.972, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.104651162790698e-05, |
|
"loss": 0.4143, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.988372093023257e-05, |
|
"loss": 0.5355, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.872093023255814e-05, |
|
"loss": 0.4717, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.755813953488372e-05, |
|
"loss": 0.3827, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.63953488372093e-05, |
|
"loss": 0.443, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.523255813953488e-05, |
|
"loss": 0.4257, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 6.406976744186047e-05, |
|
"loss": 0.3875, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 6.290697674418606e-05, |
|
"loss": 0.3956, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 6.174418604651164e-05, |
|
"loss": 0.3629, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.058139534883721e-05, |
|
"loss": 0.4621, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.7828402366863906, |
|
"eval_loss": 0.8416844606399536, |
|
"eval_runtime": 19.2964, |
|
"eval_samples_per_second": 87.581, |
|
"eval_steps_per_second": 10.986, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.941860465116279e-05, |
|
"loss": 0.4833, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.825581395348837e-05, |
|
"loss": 0.3749, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 5.709302325581396e-05, |
|
"loss": 0.4963, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 5.5930232558139536e-05, |
|
"loss": 0.2743, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 5.476744186046512e-05, |
|
"loss": 0.3608, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.36046511627907e-05, |
|
"loss": 0.3941, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.2441860465116275e-05, |
|
"loss": 0.3464, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 5.1279069767441866e-05, |
|
"loss": 0.3461, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 5.011627906976745e-05, |
|
"loss": 0.368, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.895348837209303e-05, |
|
"loss": 0.2128, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.7970414201183432, |
|
"eval_loss": 0.76435786485672, |
|
"eval_runtime": 19.4425, |
|
"eval_samples_per_second": 86.923, |
|
"eval_steps_per_second": 10.904, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 4.7790697674418605e-05, |
|
"loss": 0.2484, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 4.662790697674419e-05, |
|
"loss": 0.2213, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 4.5465116279069766e-05, |
|
"loss": 0.2305, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 4.430232558139535e-05, |
|
"loss": 0.208, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 4.3139534883720935e-05, |
|
"loss": 0.2191, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 4.197674418604651e-05, |
|
"loss": 0.2032, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 4.0813953488372096e-05, |
|
"loss": 0.216, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.9651162790697674e-05, |
|
"loss": 0.1977, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.848837209302326e-05, |
|
"loss": 0.1767, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.732558139534884e-05, |
|
"loss": 0.1883, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_accuracy": 0.8183431952662722, |
|
"eval_loss": 0.7001345157623291, |
|
"eval_runtime": 19.4125, |
|
"eval_samples_per_second": 87.057, |
|
"eval_steps_per_second": 10.921, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.616279069767442e-05, |
|
"loss": 0.1768, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1436, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.383720930232558e-05, |
|
"loss": 0.1781, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.2674418604651165e-05, |
|
"loss": 0.1816, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.151162790697675e-05, |
|
"loss": 0.1516, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.0348837209302327e-05, |
|
"loss": 0.182, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 2.9186046511627908e-05, |
|
"loss": 0.1435, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.8023255813953492e-05, |
|
"loss": 0.1575, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 2.6860465116279073e-05, |
|
"loss": 0.1361, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 2.569767441860465e-05, |
|
"loss": 0.1501, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"eval_accuracy": 0.8201183431952662, |
|
"eval_loss": 0.6825571060180664, |
|
"eval_runtime": 19.3331, |
|
"eval_samples_per_second": 87.415, |
|
"eval_steps_per_second": 10.966, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 2.453488372093023e-05, |
|
"loss": 0.1719, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.3372093023255815e-05, |
|
"loss": 0.2121, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.2209302325581396e-05, |
|
"loss": 0.1895, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.104651162790698e-05, |
|
"loss": 0.1466, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.9883720930232557e-05, |
|
"loss": 0.2179, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.8720930232558138e-05, |
|
"loss": 0.1219, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1.7558139534883722e-05, |
|
"loss": 0.1275, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.6395348837209303e-05, |
|
"loss": 0.177, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.5232558139534884e-05, |
|
"loss": 0.1242, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.4069767441860465e-05, |
|
"loss": 0.1626, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_accuracy": 0.8254437869822485, |
|
"eval_loss": 0.6568493843078613, |
|
"eval_runtime": 19.3691, |
|
"eval_samples_per_second": 87.253, |
|
"eval_steps_per_second": 10.945, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.2906976744186047e-05, |
|
"loss": 0.1078, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.1744186046511628e-05, |
|
"loss": 0.1073, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.058139534883721e-05, |
|
"loss": 0.106, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 9.418604651162791e-06, |
|
"loss": 0.1707, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 8.255813953488372e-06, |
|
"loss": 0.1292, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 7.093023255813954e-06, |
|
"loss": 0.1311, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 5.930232558139535e-06, |
|
"loss": 0.1477, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 4.767441860465117e-06, |
|
"loss": 0.1223, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.604651162790698e-06, |
|
"loss": 0.1227, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.4418604651162793e-06, |
|
"loss": 0.1053, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_accuracy": 0.827810650887574, |
|
"eval_loss": 0.6401488780975342, |
|
"eval_runtime": 19.3377, |
|
"eval_samples_per_second": 87.394, |
|
"eval_steps_per_second": 10.963, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.2790697674418605e-06, |
|
"loss": 0.1335, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.1627906976744187e-07, |
|
"loss": 0.0918, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1720, |
|
"total_flos": 2.1353892586861363e+18, |
|
"train_loss": 1.3106378566387087, |
|
"train_runtime": 1021.3737, |
|
"train_samples_per_second": 26.944, |
|
"train_steps_per_second": 1.684 |
|
} |
|
], |
|
"max_steps": 1720, |
|
"num_train_epochs": 4, |
|
"total_flos": 2.1353892586861363e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|