|
{ |
|
"best_metric": 0.9935, |
|
"best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-mnist/checkpoint-2811", |
|
"epoch": 2.9994666666666667, |
|
"global_step": 2811, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.7730496453900712e-06, |
|
"loss": 2.3843, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.5460992907801423e-06, |
|
"loss": 2.27, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.319148936170213e-06, |
|
"loss": 2.1311, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.092198581560285e-06, |
|
"loss": 1.9981, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.865248226950355e-06, |
|
"loss": 1.761, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0638297872340426e-05, |
|
"loss": 1.4946, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.2411347517730498e-05, |
|
"loss": 1.2439, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.418439716312057e-05, |
|
"loss": 1.0823, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.595744680851064e-05, |
|
"loss": 0.9241, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.773049645390071e-05, |
|
"loss": 0.8283, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.950354609929078e-05, |
|
"loss": 0.8041, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.1276595744680852e-05, |
|
"loss": 0.6482, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.3049645390070924e-05, |
|
"loss": 0.6448, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.4822695035460995e-05, |
|
"loss": 0.5983, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.6595744680851064e-05, |
|
"loss": 0.5754, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.836879432624114e-05, |
|
"loss": 0.5485, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.0141843971631207e-05, |
|
"loss": 0.5681, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.191489361702128e-05, |
|
"loss": 0.48, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.3687943262411347e-05, |
|
"loss": 0.5461, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.546099290780142e-05, |
|
"loss": 0.5761, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.723404255319149e-05, |
|
"loss": 0.4957, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.900709219858156e-05, |
|
"loss": 0.4965, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.078014184397163e-05, |
|
"loss": 0.473, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2553191489361704e-05, |
|
"loss": 0.4948, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.432624113475177e-05, |
|
"loss": 0.5289, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.609929078014185e-05, |
|
"loss": 0.4895, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.787234042553192e-05, |
|
"loss": 0.5204, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.964539007092199e-05, |
|
"loss": 0.5589, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.984183471727956e-05, |
|
"loss": 0.4661, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.964412811387901e-05, |
|
"loss": 0.4783, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944642151047845e-05, |
|
"loss": 0.5219, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.92487149070779e-05, |
|
"loss": 0.5128, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.905100830367734e-05, |
|
"loss": 0.4896, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.8853301700276796e-05, |
|
"loss": 0.4123, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.865559509687624e-05, |
|
"loss": 0.4216, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.8457888493475686e-05, |
|
"loss": 0.4611, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.826018189007513e-05, |
|
"loss": 0.469, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.806247528667458e-05, |
|
"loss": 0.4072, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.786476868327402e-05, |
|
"loss": 0.4476, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.766706207987347e-05, |
|
"loss": 0.4469, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.746935547647291e-05, |
|
"loss": 0.4529, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.727164887307236e-05, |
|
"loss": 0.4699, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.707394226967181e-05, |
|
"loss": 0.4249, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.687623566627126e-05, |
|
"loss": 0.439, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.66785290628707e-05, |
|
"loss": 0.4137, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6480822459470146e-05, |
|
"loss": 0.5142, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6283115856069595e-05, |
|
"loss": 0.4532, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.608540925266904e-05, |
|
"loss": 0.4514, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.588770264926849e-05, |
|
"loss": 0.4166, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.568999604586793e-05, |
|
"loss": 0.4584, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.549228944246738e-05, |
|
"loss": 0.3461, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.529458283906683e-05, |
|
"loss": 0.4783, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5096876235666277e-05, |
|
"loss": 0.4474, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.489916963226572e-05, |
|
"loss": 0.4403, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4701463028865166e-05, |
|
"loss": 0.3663, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.450375642546461e-05, |
|
"loss": 0.3791, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.430604982206406e-05, |
|
"loss": 0.4573, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4108343218663504e-05, |
|
"loss": 0.3962, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.391063661526295e-05, |
|
"loss": 0.3705, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.371293001186239e-05, |
|
"loss": 0.4082, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.351522340846185e-05, |
|
"loss": 0.4173, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.331751680506129e-05, |
|
"loss": 0.4122, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.311981020166074e-05, |
|
"loss": 0.4279, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2922103598260186e-05, |
|
"loss": 0.3369, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.272439699485963e-05, |
|
"loss": 0.3995, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.252669039145908e-05, |
|
"loss": 0.3735, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.232898378805852e-05, |
|
"loss": 0.356, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.213127718465797e-05, |
|
"loss": 0.3741, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.193357058125741e-05, |
|
"loss": 0.4291, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.173586397785687e-05, |
|
"loss": 0.4473, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.153815737445631e-05, |
|
"loss": 0.3914, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.134045077105576e-05, |
|
"loss": 0.3241, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.11427441676552e-05, |
|
"loss": 0.3986, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.0945037564254647e-05, |
|
"loss": 0.4287, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.0747330960854095e-05, |
|
"loss": 0.4011, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.054962435745354e-05, |
|
"loss": 0.388, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0351917754052984e-05, |
|
"loss": 0.3904, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.015421115065243e-05, |
|
"loss": 0.3528, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.995650454725188e-05, |
|
"loss": 0.3805, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.975879794385133e-05, |
|
"loss": 0.3698, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.956109134045078e-05, |
|
"loss": 0.3454, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.936338473705022e-05, |
|
"loss": 0.4112, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.9165678133649666e-05, |
|
"loss": 0.344, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8967971530249114e-05, |
|
"loss": 0.3694, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.877026492684856e-05, |
|
"loss": 0.3867, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8572558323448004e-05, |
|
"loss": 0.3531, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.837485172004745e-05, |
|
"loss": 0.397, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.817714511664689e-05, |
|
"loss": 0.3702, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.797943851324635e-05, |
|
"loss": 0.3751, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.778173190984579e-05, |
|
"loss": 0.3294, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.758402530644524e-05, |
|
"loss": 0.3028, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.738631870304468e-05, |
|
"loss": 0.4089, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7188612099644134e-05, |
|
"loss": 0.3376, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9855, |
|
"eval_loss": 0.044587597250938416, |
|
"eval_runtime": 131.1221, |
|
"eval_samples_per_second": 76.265, |
|
"eval_steps_per_second": 4.767, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.6990905496243575e-05, |
|
"loss": 0.4091, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.679319889284302e-05, |
|
"loss": 0.3623, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.6595492289442465e-05, |
|
"loss": 0.3532, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.639778568604191e-05, |
|
"loss": 0.3457, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.620007908264136e-05, |
|
"loss": 0.3359, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.600237247924081e-05, |
|
"loss": 0.3162, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.580466587584026e-05, |
|
"loss": 0.3081, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.56069592724397e-05, |
|
"loss": 0.4274, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.540925266903915e-05, |
|
"loss": 0.3806, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.5211546065638595e-05, |
|
"loss": 0.4025, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.501383946223804e-05, |
|
"loss": 0.3973, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4816132858837484e-05, |
|
"loss": 0.3603, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.461842625543693e-05, |
|
"loss": 0.3805, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.442071965203638e-05, |
|
"loss": 0.3542, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.422301304863583e-05, |
|
"loss": 0.3334, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.402530644523527e-05, |
|
"loss": 0.3736, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.382759984183472e-05, |
|
"loss": 0.3524, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.3629893238434166e-05, |
|
"loss": 0.4211, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.3432186635033614e-05, |
|
"loss": 0.3224, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3234480031633056e-05, |
|
"loss": 0.3446, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.3036773428232504e-05, |
|
"loss": 0.3062, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.283906682483195e-05, |
|
"loss": 0.3114, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.26413602214314e-05, |
|
"loss": 0.3816, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.244365361803085e-05, |
|
"loss": 0.3216, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.224594701463029e-05, |
|
"loss": 0.3618, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.204824041122974e-05, |
|
"loss": 0.3515, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.185053380782918e-05, |
|
"loss": 0.3023, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.1652827204428634e-05, |
|
"loss": 0.3348, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.1455120601028075e-05, |
|
"loss": 0.3047, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.125741399762752e-05, |
|
"loss": 0.4023, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.1059707394226965e-05, |
|
"loss": 0.3379, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.086200079082642e-05, |
|
"loss": 0.2975, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.066429418742586e-05, |
|
"loss": 0.3445, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.046658758402531e-05, |
|
"loss": 0.3401, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0268880980624754e-05, |
|
"loss": 0.2998, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.00711743772242e-05, |
|
"loss": 0.289, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.987346777382365e-05, |
|
"loss": 0.3816, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9675761170423095e-05, |
|
"loss": 0.3213, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.947805456702254e-05, |
|
"loss": 0.3604, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9280347963621984e-05, |
|
"loss": 0.2953, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9082641360221436e-05, |
|
"loss": 0.3474, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.888493475682088e-05, |
|
"loss": 0.3291, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8687228153420325e-05, |
|
"loss": 0.3229, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.848952155001977e-05, |
|
"loss": 0.3118, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8291814946619215e-05, |
|
"loss": 0.2957, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8094108343218666e-05, |
|
"loss": 0.3061, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.789640173981811e-05, |
|
"loss": 0.3489, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7698695136417556e-05, |
|
"loss": 0.3754, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7500988533017004e-05, |
|
"loss": 0.3633, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7303281929616452e-05, |
|
"loss": 0.3709, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.71055753262159e-05, |
|
"loss": 0.3616, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6907868722815345e-05, |
|
"loss": 0.3712, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.671016211941479e-05, |
|
"loss": 0.2578, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6512455516014234e-05, |
|
"loss": 0.3222, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.6314748912613686e-05, |
|
"loss": 0.323, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.611704230921313e-05, |
|
"loss": 0.3349, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5919335705812575e-05, |
|
"loss": 0.3152, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.572162910241202e-05, |
|
"loss": 0.3529, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5523922499011465e-05, |
|
"loss": 0.2843, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5326215895610916e-05, |
|
"loss": 0.3035, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.512850929221036e-05, |
|
"loss": 0.3331, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.4930802688809806e-05, |
|
"loss": 0.4147, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4733096085409254e-05, |
|
"loss": 0.2968, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.45353894820087e-05, |
|
"loss": 0.3199, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4337682878608147e-05, |
|
"loss": 0.3143, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.413997627520759e-05, |
|
"loss": 0.3115, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.394226967180704e-05, |
|
"loss": 0.3396, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3744563068406488e-05, |
|
"loss": 0.375, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3546856465005932e-05, |
|
"loss": 0.3709, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.334914986160538e-05, |
|
"loss": 0.2714, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3151443258204825e-05, |
|
"loss": 0.3532, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.2953736654804273e-05, |
|
"loss": 0.3799, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2756030051403718e-05, |
|
"loss": 0.3241, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2558323448003166e-05, |
|
"loss": 0.368, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.236061684460261e-05, |
|
"loss": 0.3099, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2162910241202056e-05, |
|
"loss": 0.3517, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.1965203637801504e-05, |
|
"loss": 0.2803, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.176749703440095e-05, |
|
"loss": 0.3505, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1569790431000397e-05, |
|
"loss": 0.3268, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.137208382759984e-05, |
|
"loss": 0.2774, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.117437722419929e-05, |
|
"loss": 0.3158, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0976670620798734e-05, |
|
"loss": 0.3687, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0778964017398182e-05, |
|
"loss": 0.3239, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.058125741399763e-05, |
|
"loss": 0.3351, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0383550810597075e-05, |
|
"loss": 0.341, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0185844207196523e-05, |
|
"loss": 0.2696, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.9988137603795968e-05, |
|
"loss": 0.3837, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9790431000395416e-05, |
|
"loss": 0.3441, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.959272439699486e-05, |
|
"loss": 0.258, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9395017793594306e-05, |
|
"loss": 0.3381, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9197311190193754e-05, |
|
"loss": 0.2887, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.89996045867932e-05, |
|
"loss": 0.3031, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8801897983392647e-05, |
|
"loss": 0.3168, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.860419137999209e-05, |
|
"loss": 0.318, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9916, |
|
"eval_loss": 0.026178531348705292, |
|
"eval_runtime": 131.1473, |
|
"eval_samples_per_second": 76.25, |
|
"eval_steps_per_second": 4.766, |
|
"step": 1874 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.840648477659154e-05, |
|
"loss": 0.3821, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8208778173190984e-05, |
|
"loss": 0.3132, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.8011071569790432e-05, |
|
"loss": 0.3423, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7813364966389877e-05, |
|
"loss": 0.3407, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7615658362989322e-05, |
|
"loss": 0.2871, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.741795175958877e-05, |
|
"loss": 0.2987, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7220245156188218e-05, |
|
"loss": 0.3316, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7022538552787666e-05, |
|
"loss": 0.263, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.682483194938711e-05, |
|
"loss": 0.2606, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.662712534598656e-05, |
|
"loss": 0.2799, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6429418742586004e-05, |
|
"loss": 0.3024, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.623171213918545e-05, |
|
"loss": 0.2587, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6034005535784897e-05, |
|
"loss": 0.2674, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.583629893238434e-05, |
|
"loss": 0.292, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.563859232898379e-05, |
|
"loss": 0.2998, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5440885725583234e-05, |
|
"loss": 0.2867, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5243179122182682e-05, |
|
"loss": 0.2567, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5045472518782127e-05, |
|
"loss": 0.3127, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4847765915381575e-05, |
|
"loss": 0.3159, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4650059311981022e-05, |
|
"loss": 0.2666, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4452352708580466e-05, |
|
"loss": 0.3152, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4254646105179915e-05, |
|
"loss": 0.2772, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.405693950177936e-05, |
|
"loss": 0.3362, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3859232898378807e-05, |
|
"loss": 0.2909, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3661526294978252e-05, |
|
"loss": 0.3371, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.34638196915777e-05, |
|
"loss": 0.3314, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3266113088177145e-05, |
|
"loss": 0.3195, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3068406484776591e-05, |
|
"loss": 0.2603, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.287069988137604e-05, |
|
"loss": 0.2797, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2672993277975484e-05, |
|
"loss": 0.2906, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2475286674574932e-05, |
|
"loss": 0.342, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2277580071174377e-05, |
|
"loss": 0.3176, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.2079873467773824e-05, |
|
"loss": 0.271, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.188216686437327e-05, |
|
"loss": 0.3251, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1684460260972716e-05, |
|
"loss": 0.2421, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1486753657572163e-05, |
|
"loss": 0.3071, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.128904705417161e-05, |
|
"loss": 0.3017, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1091340450771057e-05, |
|
"loss": 0.3107, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0893633847370504e-05, |
|
"loss": 0.2682, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0695927243969949e-05, |
|
"loss": 0.3018, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0498220640569395e-05, |
|
"loss": 0.3179, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0300514037168841e-05, |
|
"loss": 0.3172, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0102807433768288e-05, |
|
"loss": 0.3421, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.905100830367734e-06, |
|
"loss": 0.2955, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.70739422696718e-06, |
|
"loss": 0.3146, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.509687623566627e-06, |
|
"loss": 0.2883, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.311981020166075e-06, |
|
"loss": 0.3252, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.11427441676552e-06, |
|
"loss": 0.3297, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.916567813364967e-06, |
|
"loss": 0.3002, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.718861209964413e-06, |
|
"loss": 0.2354, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.52115460656386e-06, |
|
"loss": 0.2808, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.323448003163306e-06, |
|
"loss": 0.3158, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.125741399762752e-06, |
|
"loss": 0.3408, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.928034796362199e-06, |
|
"loss": 0.3253, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.730328192961645e-06, |
|
"loss": 0.2898, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.532621589561091e-06, |
|
"loss": 0.3144, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.334914986160538e-06, |
|
"loss": 0.3143, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.137208382759984e-06, |
|
"loss": 0.2758, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.939501779359431e-06, |
|
"loss": 0.2996, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.741795175958877e-06, |
|
"loss": 0.3002, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.544088572558324e-06, |
|
"loss": 0.3579, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.346381969157771e-06, |
|
"loss": 0.2395, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.1486753657572165e-06, |
|
"loss": 0.242, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.950968762356663e-06, |
|
"loss": 0.2256, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.753262158956109e-06, |
|
"loss": 0.2807, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.3264, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.357848952155002e-06, |
|
"loss": 0.323, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.160142348754449e-06, |
|
"loss": 0.2416, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.962435745353895e-06, |
|
"loss": 0.2796, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.7647291419533415e-06, |
|
"loss": 0.2513, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.567022538552788e-06, |
|
"loss": 0.2681, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.369315935152234e-06, |
|
"loss": 0.3121, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.171609331751681e-06, |
|
"loss": 0.2579, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.973902728351126e-06, |
|
"loss": 0.2899, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.7761961249505733e-06, |
|
"loss": 0.2676, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.57848952155002e-06, |
|
"loss": 0.2652, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.3807829181494666e-06, |
|
"loss": 0.2804, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.1830763147489126e-06, |
|
"loss": 0.2689, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.985369711348359e-06, |
|
"loss": 0.2918, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.787663107947806e-06, |
|
"loss": 0.2602, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.589956504547252e-06, |
|
"loss": 0.2595, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3922499011466983e-06, |
|
"loss": 0.268, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1945432977461447e-06, |
|
"loss": 0.238, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.996836694345591e-06, |
|
"loss": 0.2787, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7991300909450376e-06, |
|
"loss": 0.2723, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6014234875444842e-06, |
|
"loss": 0.2974, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4037168841439304e-06, |
|
"loss": 0.2928, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2060102807433769e-06, |
|
"loss": 0.2526, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.0083036773428233e-06, |
|
"loss": 0.2957, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.105970739422697e-07, |
|
"loss": 0.2813, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 6.12890470541716e-07, |
|
"loss": 0.3048, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.151838671411625e-07, |
|
"loss": 0.2735, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.1747726374060896e-07, |
|
"loss": 0.2681, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.977066034005536e-08, |
|
"loss": 0.2374, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9935, |
|
"eval_loss": 0.02018115483224392, |
|
"eval_runtime": 131.0943, |
|
"eval_samples_per_second": 76.281, |
|
"eval_steps_per_second": 4.768, |
|
"step": 2811 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2811, |
|
"total_flos": 1.3941117625304089e+19, |
|
"train_loss": 0.3983713053842746, |
|
"train_runtime": 6676.1657, |
|
"train_samples_per_second": 26.962, |
|
"train_steps_per_second": 0.421 |
|
} |
|
], |
|
"max_steps": 2811, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.3941117625304089e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|