|
{ |
|
"best_metric": 0.8123190611646329, |
|
"best_model_checkpoint": "output/roberta-large-question-classifier/checkpoint-2563", |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 6990, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.430615164520744e-06, |
|
"loss": 2.3372, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.861230329041488e-06, |
|
"loss": 2.276, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.291845493562232e-06, |
|
"loss": 2.1988, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.722460658082976e-06, |
|
"loss": 1.9467, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.4050404697492347, |
|
"eval_loss": 1.3099409341812134, |
|
"eval_runtime": 1.3906, |
|
"eval_samples_per_second": 417.1, |
|
"eval_steps_per_second": 1.438, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 7.15307582260372e-06, |
|
"loss": 1.5551, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 8.583690987124465e-06, |
|
"loss": 1.0537, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0014306151645208e-05, |
|
"loss": 0.872, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.1444921316165953e-05, |
|
"loss": 0.6619, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.2875536480686697e-05, |
|
"loss": 0.6381, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.7785421184302428, |
|
"eval_loss": 0.5586220622062683, |
|
"eval_runtime": 1.4464, |
|
"eval_samples_per_second": 400.997, |
|
"eval_steps_per_second": 1.383, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.430615164520744e-05, |
|
"loss": 0.509, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.5736766809728185e-05, |
|
"loss": 0.5387, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.716738197424893e-05, |
|
"loss": 0.5163, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.859799713876967e-05, |
|
"loss": 0.628, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.7831151120797589, |
|
"eval_loss": 0.6418800354003906, |
|
"eval_runtime": 1.467, |
|
"eval_samples_per_second": 395.356, |
|
"eval_steps_per_second": 1.363, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.9996820855189955e-05, |
|
"loss": 0.5632, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.983786361468765e-05, |
|
"loss": 0.4046, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.9678906374185345e-05, |
|
"loss": 0.3985, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1.951994913368304e-05, |
|
"loss": 0.4307, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.9360991893180737e-05, |
|
"loss": 0.4487, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.8093842888236766, |
|
"eval_loss": 0.5770355463027954, |
|
"eval_runtime": 1.4647, |
|
"eval_samples_per_second": 395.985, |
|
"eval_steps_per_second": 1.365, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.9202034652678432e-05, |
|
"loss": 0.3373, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.9043077412176127e-05, |
|
"loss": 0.2578, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.888412017167382e-05, |
|
"loss": 0.2675, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.8725162931171516e-05, |
|
"loss": 0.2697, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.8566205690669214e-05, |
|
"loss": 0.3319, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.7952503005676876, |
|
"eval_loss": 0.7712982296943665, |
|
"eval_runtime": 1.5475, |
|
"eval_samples_per_second": 374.79, |
|
"eval_steps_per_second": 1.292, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 1.8407248450166905e-05, |
|
"loss": 0.2049, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 1.82482912096646e-05, |
|
"loss": 0.2344, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 1.8089333969162298e-05, |
|
"loss": 0.1843, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 1.7930376728659993e-05, |
|
"loss": 0.2095, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.8017807103839256, |
|
"eval_loss": 0.8798965811729431, |
|
"eval_runtime": 1.4572, |
|
"eval_samples_per_second": 398.025, |
|
"eval_steps_per_second": 1.372, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 1.7771419488157687e-05, |
|
"loss": 0.2039, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 1.7612462247655382e-05, |
|
"loss": 0.0876, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 1.7453505007153077e-05, |
|
"loss": 0.1054, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.7294547766650775e-05, |
|
"loss": 0.1629, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.7135590526148466e-05, |
|
"loss": 0.1355, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.7961224122154954, |
|
"eval_loss": 1.0646474361419678, |
|
"eval_runtime": 1.4774, |
|
"eval_samples_per_second": 392.581, |
|
"eval_steps_per_second": 1.354, |
|
"step": 1631 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.6976633285646164e-05, |
|
"loss": 0.1457, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1.681767604514386e-05, |
|
"loss": 0.0861, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 1.6658718804641553e-05, |
|
"loss": 0.0852, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 1.6499761564139248e-05, |
|
"loss": 0.1283, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 1.6340804323636943e-05, |
|
"loss": 0.0956, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.7998718228606326, |
|
"eval_loss": 1.2174800634384155, |
|
"eval_runtime": 1.5358, |
|
"eval_samples_per_second": 377.658, |
|
"eval_steps_per_second": 1.302, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 1.6181847083134637e-05, |
|
"loss": 0.0862, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 1.6022889842632335e-05, |
|
"loss": 0.0486, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 1.5863932602130026e-05, |
|
"loss": 0.0321, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 1.5704975361627725e-05, |
|
"loss": 0.0687, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.789186529273271, |
|
"eval_loss": 1.3646652698516846, |
|
"eval_runtime": 1.5089, |
|
"eval_samples_per_second": 384.398, |
|
"eval_steps_per_second": 1.326, |
|
"step": 2097 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 1.554601812112542e-05, |
|
"loss": 0.0522, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 1.5387060880623114e-05, |
|
"loss": 0.0349, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 1.5228103640120809e-05, |
|
"loss": 0.0529, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 1.5069146399618503e-05, |
|
"loss": 0.0284, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 1.49101891591162e-05, |
|
"loss": 0.0371, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.7986917021269787, |
|
"eval_loss": 1.3809223175048828, |
|
"eval_runtime": 1.6909, |
|
"eval_samples_per_second": 343.007, |
|
"eval_steps_per_second": 1.183, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 1.4751231918613892e-05, |
|
"loss": 0.0143, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 1.4592274678111589e-05, |
|
"loss": 0.0012, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 1.4433317437609285e-05, |
|
"loss": 0.0117, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 1.427436019710698e-05, |
|
"loss": 0.0248, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 1.4115402956604673e-05, |
|
"loss": 0.0303, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.8123190611646329, |
|
"eval_loss": 1.3591104745864868, |
|
"eval_runtime": 1.57, |
|
"eval_samples_per_second": 369.434, |
|
"eval_steps_per_second": 1.274, |
|
"step": 2563 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 1.395644571610237e-05, |
|
"loss": 0.0142, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 1.3797488475600066e-05, |
|
"loss": 0.0136, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 1.363853123509776e-05, |
|
"loss": 0.0126, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 1.3479573994595455e-05, |
|
"loss": 0.0263, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.8100291935535177, |
|
"eval_loss": 1.5316766500473022, |
|
"eval_runtime": 1.5184, |
|
"eval_samples_per_second": 381.982, |
|
"eval_steps_per_second": 1.317, |
|
"step": 2796 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 1.332061675409315e-05, |
|
"loss": 0.011, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 1.3161659513590846e-05, |
|
"loss": 0.0002, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 1.300270227308854e-05, |
|
"loss": 0.0057, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 1.2843745032586235e-05, |
|
"loss": 0.0016, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"learning_rate": 1.268478779208393e-05, |
|
"loss": 0.0144, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.7959241618420011, |
|
"eval_loss": 1.5725551843643188, |
|
"eval_runtime": 1.4849, |
|
"eval_samples_per_second": 390.601, |
|
"eval_steps_per_second": 1.347, |
|
"step": 3029 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 1.2525830551581626e-05, |
|
"loss": 0.006, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 1.236687331107932e-05, |
|
"loss": 0.0056, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 1.2207916070577015e-05, |
|
"loss": 0.0114, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 1.204895883007471e-05, |
|
"loss": 0.021, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 1.1890001589572406e-05, |
|
"loss": 0.0436, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.7987626313618129, |
|
"eval_loss": 1.6159876585006714, |
|
"eval_runtime": 1.4555, |
|
"eval_samples_per_second": 398.497, |
|
"eval_steps_per_second": 1.374, |
|
"step": 3262 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 1.1731044349070103e-05, |
|
"loss": 0.0002, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 1.1572087108567796e-05, |
|
"loss": 0.0062, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 1.141312986806549e-05, |
|
"loss": 0.0056, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 1.1254172627563187e-05, |
|
"loss": 0.0048, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.7957479636902922, |
|
"eval_loss": 1.6826026439666748, |
|
"eval_runtime": 1.4617, |
|
"eval_samples_per_second": 396.789, |
|
"eval_steps_per_second": 1.368, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 1.1095215387060883e-05, |
|
"loss": 0.0039, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 1.0936258146558576e-05, |
|
"loss": 0.0001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 1.0777300906056272e-05, |
|
"loss": 0.0236, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 1.0618343665553967e-05, |
|
"loss": 0.0004, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 1.0459386425051663e-05, |
|
"loss": 0.0001, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_f1": 0.7956639409293647, |
|
"eval_loss": 1.6912556886672974, |
|
"eval_runtime": 1.4563, |
|
"eval_samples_per_second": 398.278, |
|
"eval_steps_per_second": 1.373, |
|
"step": 3728 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 1.0300429184549356e-05, |
|
"loss": 0.0002, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 1.0141471944047053e-05, |
|
"loss": 0.0002, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 9.982514703544747e-06, |
|
"loss": 0.0006, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 9.823557463042442e-06, |
|
"loss": 0.0002, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 9.664600222540137e-06, |
|
"loss": 0.0001, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_f1": 0.7994751240525658, |
|
"eval_loss": 1.7075979709625244, |
|
"eval_runtime": 1.4886, |
|
"eval_samples_per_second": 389.634, |
|
"eval_steps_per_second": 1.344, |
|
"step": 3961 |
|
}, |
|
{ |
|
"epoch": 17.17, |
|
"learning_rate": 9.505642982037833e-06, |
|
"loss": 0.0002, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 17.38, |
|
"learning_rate": 9.346685741535528e-06, |
|
"loss": 0.0185, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 9.187728501033222e-06, |
|
"loss": 0.0001, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"learning_rate": 9.028771260530917e-06, |
|
"loss": 0.0034, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_f1": 0.7960354805040918, |
|
"eval_loss": 1.8018221855163574, |
|
"eval_runtime": 1.5408, |
|
"eval_samples_per_second": 376.422, |
|
"eval_steps_per_second": 1.298, |
|
"step": 4194 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 8.869814020028613e-06, |
|
"loss": 0.013, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 8.710856779526308e-06, |
|
"loss": 0.0003, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 8.551899539024003e-06, |
|
"loss": 0.0001, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 8.392942298521697e-06, |
|
"loss": 0.0002, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 18.88, |
|
"learning_rate": 8.233985058019394e-06, |
|
"loss": 0.0228, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_f1": 0.7915974698658704, |
|
"eval_loss": 1.7456856966018677, |
|
"eval_runtime": 1.4762, |
|
"eval_samples_per_second": 392.912, |
|
"eval_steps_per_second": 1.355, |
|
"step": 4427 |
|
}, |
|
{ |
|
"epoch": 19.1, |
|
"learning_rate": 8.075027817517088e-06, |
|
"loss": 0.0006, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 7.916070577014783e-06, |
|
"loss": 0.0037, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 19.53, |
|
"learning_rate": 7.757113336512478e-06, |
|
"loss": 0.0314, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 19.74, |
|
"learning_rate": 7.598156096010174e-06, |
|
"loss": 0.0028, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 19.96, |
|
"learning_rate": 7.439198855507869e-06, |
|
"loss": 0.0083, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_f1": 0.7868576028090374, |
|
"eval_loss": 1.9279075860977173, |
|
"eval_runtime": 1.4679, |
|
"eval_samples_per_second": 395.119, |
|
"eval_steps_per_second": 1.362, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 20.17, |
|
"learning_rate": 7.280241615005564e-06, |
|
"loss": 0.0009, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 20.39, |
|
"learning_rate": 7.121284374503259e-06, |
|
"loss": 0.0002, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 20.6, |
|
"learning_rate": 6.962327134000954e-06, |
|
"loss": 0.0082, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 20.82, |
|
"learning_rate": 6.803369893498649e-06, |
|
"loss": 0.0001, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_f1": 0.7915377946685866, |
|
"eval_loss": 1.8367053270339966, |
|
"eval_runtime": 2.0999, |
|
"eval_samples_per_second": 276.201, |
|
"eval_steps_per_second": 0.952, |
|
"step": 4893 |
|
}, |
|
{ |
|
"epoch": 21.03, |
|
"learning_rate": 6.6444126529963445e-06, |
|
"loss": 0.0001, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 21.24, |
|
"learning_rate": 6.485455412494039e-06, |
|
"loss": 0.0072, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 21.46, |
|
"learning_rate": 6.326498171991735e-06, |
|
"loss": 0.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 6.167540931489429e-06, |
|
"loss": 0.0, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 21.89, |
|
"learning_rate": 6.008583690987126e-06, |
|
"loss": 0.0003, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_f1": 0.7842117575951872, |
|
"eval_loss": 1.8620420694351196, |
|
"eval_runtime": 1.8603, |
|
"eval_samples_per_second": 311.785, |
|
"eval_steps_per_second": 1.075, |
|
"step": 5126 |
|
}, |
|
{ |
|
"epoch": 22.1, |
|
"learning_rate": 5.8496264504848195e-06, |
|
"loss": 0.0007, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 22.32, |
|
"learning_rate": 5.690669209982516e-06, |
|
"loss": 0.0, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 22.53, |
|
"learning_rate": 5.5317119694802105e-06, |
|
"loss": 0.0021, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 22.75, |
|
"learning_rate": 5.372754728977906e-06, |
|
"loss": 0.0077, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 22.96, |
|
"learning_rate": 5.213797488475601e-06, |
|
"loss": 0.0002, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_f1": 0.7828476594276503, |
|
"eval_loss": 1.919188141822815, |
|
"eval_runtime": 1.4859, |
|
"eval_samples_per_second": 390.344, |
|
"eval_steps_per_second": 1.346, |
|
"step": 5359 |
|
}, |
|
{ |
|
"epoch": 23.18, |
|
"learning_rate": 5.054840247973296e-06, |
|
"loss": 0.0194, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 23.39, |
|
"learning_rate": 4.895883007470991e-06, |
|
"loss": 0.0132, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 23.61, |
|
"learning_rate": 4.7369257669686855e-06, |
|
"loss": 0.0001, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 23.82, |
|
"learning_rate": 4.577968526466381e-06, |
|
"loss": 0.0, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_f1": 0.7927310235612234, |
|
"eval_loss": 1.9081404209136963, |
|
"eval_runtime": 1.4831, |
|
"eval_samples_per_second": 391.082, |
|
"eval_steps_per_second": 1.349, |
|
"step": 5592 |
|
}, |
|
{ |
|
"epoch": 24.03, |
|
"learning_rate": 4.419011285964076e-06, |
|
"loss": 0.0, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 24.25, |
|
"learning_rate": 4.260054045461771e-06, |
|
"loss": 0.0001, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 24.46, |
|
"learning_rate": 4.101096804959467e-06, |
|
"loss": 0.0122, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 24.68, |
|
"learning_rate": 3.942139564457161e-06, |
|
"loss": 0.0, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 3.7831823239548564e-06, |
|
"loss": 0.0003, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_f1": 0.7812550199347442, |
|
"eval_loss": 1.9822450876235962, |
|
"eval_runtime": 1.5174, |
|
"eval_samples_per_second": 382.228, |
|
"eval_steps_per_second": 1.318, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 25.11, |
|
"learning_rate": 3.6242250834525515e-06, |
|
"loss": 0.0116, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 3.4652678429502466e-06, |
|
"loss": 0.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 25.54, |
|
"learning_rate": 3.306310602447942e-06, |
|
"loss": 0.0004, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 25.75, |
|
"learning_rate": 3.147353361945637e-06, |
|
"loss": 0.0027, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"learning_rate": 2.9883961214433322e-06, |
|
"loss": 0.0059, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_f1": 0.7953953204096383, |
|
"eval_loss": 1.8736791610717773, |
|
"eval_runtime": 1.4646, |
|
"eval_samples_per_second": 396.003, |
|
"eval_steps_per_second": 1.366, |
|
"step": 6058 |
|
}, |
|
{ |
|
"epoch": 26.18, |
|
"learning_rate": 2.8294388809410273e-06, |
|
"loss": 0.0001, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 26.39, |
|
"learning_rate": 2.6704816404387224e-06, |
|
"loss": 0.0, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"learning_rate": 2.5115243999364175e-06, |
|
"loss": 0.0, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 26.82, |
|
"learning_rate": 2.3525671594341126e-06, |
|
"loss": 0.0, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_f1": 0.7929217495075929, |
|
"eval_loss": 1.879309892654419, |
|
"eval_runtime": 1.9514, |
|
"eval_samples_per_second": 297.228, |
|
"eval_steps_per_second": 1.025, |
|
"step": 6291 |
|
}, |
|
{ |
|
"epoch": 27.04, |
|
"learning_rate": 2.1936099189318076e-06, |
|
"loss": 0.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 27.25, |
|
"learning_rate": 2.0346526784295027e-06, |
|
"loss": 0.0, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 27.47, |
|
"learning_rate": 1.8756954379271978e-06, |
|
"loss": 0.0111, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 27.68, |
|
"learning_rate": 1.7167381974248929e-06, |
|
"loss": 0.0, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 27.9, |
|
"learning_rate": 1.557780956922588e-06, |
|
"loss": 0.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_f1": 0.794029634093503, |
|
"eval_loss": 1.8904625177383423, |
|
"eval_runtime": 2.2478, |
|
"eval_samples_per_second": 258.035, |
|
"eval_steps_per_second": 0.89, |
|
"step": 6524 |
|
}, |
|
{ |
|
"epoch": 28.11, |
|
"learning_rate": 1.398823716420283e-06, |
|
"loss": 0.0, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"learning_rate": 1.2398664759179781e-06, |
|
"loss": 0.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 28.54, |
|
"learning_rate": 1.0809092354156734e-06, |
|
"loss": 0.0, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 28.76, |
|
"learning_rate": 9.219519949133683e-07, |
|
"loss": 0.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"learning_rate": 7.629947544110635e-07, |
|
"loss": 0.0, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_f1": 0.794029634093503, |
|
"eval_loss": 1.8970826864242554, |
|
"eval_runtime": 1.9492, |
|
"eval_samples_per_second": 297.551, |
|
"eval_steps_per_second": 1.026, |
|
"step": 6757 |
|
}, |
|
{ |
|
"epoch": 29.18, |
|
"learning_rate": 6.040375139087585e-07, |
|
"loss": 0.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 29.4, |
|
"learning_rate": 4.450802734064537e-07, |
|
"loss": 0.0, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 29.61, |
|
"learning_rate": 2.861230329041488e-07, |
|
"loss": 0.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"learning_rate": 1.2716579240184392e-07, |
|
"loss": 0.0002, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_f1": 0.7954091951908298, |
|
"eval_loss": 1.9001948833465576, |
|
"eval_runtime": 1.8428, |
|
"eval_samples_per_second": 314.746, |
|
"eval_steps_per_second": 1.085, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 6990, |
|
"total_flos": 5566168764425088.0, |
|
"train_loss": 0.16078996370909257, |
|
"train_runtime": 2045.6711, |
|
"train_samples_per_second": 54.496, |
|
"train_steps_per_second": 3.417 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 6990, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 5566168764425088.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|