{ "best_metric": 0.8123190611646329, "best_model_checkpoint": "output/roberta-large-question-classifier/checkpoint-2563", "epoch": 30.0, "eval_steps": 500, "global_step": 6990, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21, "learning_rate": 1.430615164520744e-06, "loss": 2.3372, "step": 50 }, { "epoch": 0.43, "learning_rate": 2.861230329041488e-06, "loss": 2.276, "step": 100 }, { "epoch": 0.64, "learning_rate": 4.291845493562232e-06, "loss": 2.1988, "step": 150 }, { "epoch": 0.86, "learning_rate": 5.722460658082976e-06, "loss": 1.9467, "step": 200 }, { "epoch": 1.0, "eval_f1": 0.4050404697492347, "eval_loss": 1.3099409341812134, "eval_runtime": 1.3906, "eval_samples_per_second": 417.1, "eval_steps_per_second": 1.438, "step": 233 }, { "epoch": 1.07, "learning_rate": 7.15307582260372e-06, "loss": 1.5551, "step": 250 }, { "epoch": 1.29, "learning_rate": 8.583690987124465e-06, "loss": 1.0537, "step": 300 }, { "epoch": 1.5, "learning_rate": 1.0014306151645208e-05, "loss": 0.872, "step": 350 }, { "epoch": 1.72, "learning_rate": 1.1444921316165953e-05, "loss": 0.6619, "step": 400 }, { "epoch": 1.93, "learning_rate": 1.2875536480686697e-05, "loss": 0.6381, "step": 450 }, { "epoch": 2.0, "eval_f1": 0.7785421184302428, "eval_loss": 0.5586220622062683, "eval_runtime": 1.4464, "eval_samples_per_second": 400.997, "eval_steps_per_second": 1.383, "step": 466 }, { "epoch": 2.15, "learning_rate": 1.430615164520744e-05, "loss": 0.509, "step": 500 }, { "epoch": 2.36, "learning_rate": 1.5736766809728185e-05, "loss": 0.5387, "step": 550 }, { "epoch": 2.58, "learning_rate": 1.716738197424893e-05, "loss": 0.5163, "step": 600 }, { "epoch": 2.79, "learning_rate": 1.859799713876967e-05, "loss": 0.628, "step": 650 }, { "epoch": 3.0, "eval_f1": 0.7831151120797589, "eval_loss": 0.6418800354003906, "eval_runtime": 1.467, "eval_samples_per_second": 395.356, "eval_steps_per_second": 1.363, "step": 699 }, { "epoch": 3.0, "learning_rate": 1.9996820855189955e-05, "loss": 0.5632, "step": 700 }, { "epoch": 3.22, "learning_rate": 1.983786361468765e-05, "loss": 0.4046, "step": 750 }, { "epoch": 3.43, "learning_rate": 1.9678906374185345e-05, "loss": 0.3985, "step": 800 }, { "epoch": 3.65, "learning_rate": 1.951994913368304e-05, "loss": 0.4307, "step": 850 }, { "epoch": 3.86, "learning_rate": 1.9360991893180737e-05, "loss": 0.4487, "step": 900 }, { "epoch": 4.0, "eval_f1": 0.8093842888236766, "eval_loss": 0.5770355463027954, "eval_runtime": 1.4647, "eval_samples_per_second": 395.985, "eval_steps_per_second": 1.365, "step": 932 }, { "epoch": 4.08, "learning_rate": 1.9202034652678432e-05, "loss": 0.3373, "step": 950 }, { "epoch": 4.29, "learning_rate": 1.9043077412176127e-05, "loss": 0.2578, "step": 1000 }, { "epoch": 4.51, "learning_rate": 1.888412017167382e-05, "loss": 0.2675, "step": 1050 }, { "epoch": 4.72, "learning_rate": 1.8725162931171516e-05, "loss": 0.2697, "step": 1100 }, { "epoch": 4.94, "learning_rate": 1.8566205690669214e-05, "loss": 0.3319, "step": 1150 }, { "epoch": 5.0, "eval_f1": 0.7952503005676876, "eval_loss": 0.7712982296943665, "eval_runtime": 1.5475, "eval_samples_per_second": 374.79, "eval_steps_per_second": 1.292, "step": 1165 }, { "epoch": 5.15, "learning_rate": 1.8407248450166905e-05, "loss": 0.2049, "step": 1200 }, { "epoch": 5.36, "learning_rate": 1.82482912096646e-05, "loss": 0.2344, "step": 1250 }, { "epoch": 5.58, "learning_rate": 1.8089333969162298e-05, "loss": 0.1843, "step": 1300 }, { "epoch": 5.79, "learning_rate": 1.7930376728659993e-05, "loss": 0.2095, "step": 1350 }, { "epoch": 6.0, "eval_f1": 0.8017807103839256, "eval_loss": 0.8798965811729431, "eval_runtime": 1.4572, "eval_samples_per_second": 398.025, "eval_steps_per_second": 1.372, "step": 1398 }, { "epoch": 6.01, "learning_rate": 1.7771419488157687e-05, "loss": 0.2039, "step": 1400 }, { "epoch": 6.22, "learning_rate": 1.7612462247655382e-05, "loss": 0.0876, "step": 1450 }, { "epoch": 6.44, "learning_rate": 1.7453505007153077e-05, "loss": 0.1054, "step": 1500 }, { "epoch": 6.65, "learning_rate": 1.7294547766650775e-05, "loss": 0.1629, "step": 1550 }, { "epoch": 6.87, "learning_rate": 1.7135590526148466e-05, "loss": 0.1355, "step": 1600 }, { "epoch": 7.0, "eval_f1": 0.7961224122154954, "eval_loss": 1.0646474361419678, "eval_runtime": 1.4774, "eval_samples_per_second": 392.581, "eval_steps_per_second": 1.354, "step": 1631 }, { "epoch": 7.08, "learning_rate": 1.6976633285646164e-05, "loss": 0.1457, "step": 1650 }, { "epoch": 7.3, "learning_rate": 1.681767604514386e-05, "loss": 0.0861, "step": 1700 }, { "epoch": 7.51, "learning_rate": 1.6658718804641553e-05, "loss": 0.0852, "step": 1750 }, { "epoch": 7.73, "learning_rate": 1.6499761564139248e-05, "loss": 0.1283, "step": 1800 }, { "epoch": 7.94, "learning_rate": 1.6340804323636943e-05, "loss": 0.0956, "step": 1850 }, { "epoch": 8.0, "eval_f1": 0.7998718228606326, "eval_loss": 1.2174800634384155, "eval_runtime": 1.5358, "eval_samples_per_second": 377.658, "eval_steps_per_second": 1.302, "step": 1864 }, { "epoch": 8.15, "learning_rate": 1.6181847083134637e-05, "loss": 0.0862, "step": 1900 }, { "epoch": 8.37, "learning_rate": 1.6022889842632335e-05, "loss": 0.0486, "step": 1950 }, { "epoch": 8.58, "learning_rate": 1.5863932602130026e-05, "loss": 0.0321, "step": 2000 }, { "epoch": 8.8, "learning_rate": 1.5704975361627725e-05, "loss": 0.0687, "step": 2050 }, { "epoch": 9.0, "eval_f1": 0.789186529273271, "eval_loss": 1.3646652698516846, "eval_runtime": 1.5089, "eval_samples_per_second": 384.398, "eval_steps_per_second": 1.326, "step": 2097 }, { "epoch": 9.01, "learning_rate": 1.554601812112542e-05, "loss": 0.0522, "step": 2100 }, { "epoch": 9.23, "learning_rate": 1.5387060880623114e-05, "loss": 0.0349, "step": 2150 }, { "epoch": 9.44, "learning_rate": 1.5228103640120809e-05, "loss": 0.0529, "step": 2200 }, { "epoch": 9.66, "learning_rate": 1.5069146399618503e-05, "loss": 0.0284, "step": 2250 }, { "epoch": 9.87, "learning_rate": 1.49101891591162e-05, "loss": 0.0371, "step": 2300 }, { "epoch": 10.0, "eval_f1": 0.7986917021269787, "eval_loss": 1.3809223175048828, "eval_runtime": 1.6909, "eval_samples_per_second": 343.007, "eval_steps_per_second": 1.183, "step": 2330 }, { "epoch": 10.09, "learning_rate": 1.4751231918613892e-05, "loss": 0.0143, "step": 2350 }, { "epoch": 10.3, "learning_rate": 1.4592274678111589e-05, "loss": 0.0012, "step": 2400 }, { "epoch": 10.52, "learning_rate": 1.4433317437609285e-05, "loss": 0.0117, "step": 2450 }, { "epoch": 10.73, "learning_rate": 1.427436019710698e-05, "loss": 0.0248, "step": 2500 }, { "epoch": 10.94, "learning_rate": 1.4115402956604673e-05, "loss": 0.0303, "step": 2550 }, { "epoch": 11.0, "eval_f1": 0.8123190611646329, "eval_loss": 1.3591104745864868, "eval_runtime": 1.57, "eval_samples_per_second": 369.434, "eval_steps_per_second": 1.274, "step": 2563 }, { "epoch": 11.16, "learning_rate": 1.395644571610237e-05, "loss": 0.0142, "step": 2600 }, { "epoch": 11.37, "learning_rate": 1.3797488475600066e-05, "loss": 0.0136, "step": 2650 }, { "epoch": 11.59, "learning_rate": 1.363853123509776e-05, "loss": 0.0126, "step": 2700 }, { "epoch": 11.8, "learning_rate": 1.3479573994595455e-05, "loss": 0.0263, "step": 2750 }, { "epoch": 12.0, "eval_f1": 0.8100291935535177, "eval_loss": 1.5316766500473022, "eval_runtime": 1.5184, "eval_samples_per_second": 381.982, "eval_steps_per_second": 1.317, "step": 2796 }, { "epoch": 12.02, "learning_rate": 1.332061675409315e-05, "loss": 0.011, "step": 2800 }, { "epoch": 12.23, "learning_rate": 1.3161659513590846e-05, "loss": 0.0002, "step": 2850 }, { "epoch": 12.45, "learning_rate": 1.300270227308854e-05, "loss": 0.0057, "step": 2900 }, { "epoch": 12.66, "learning_rate": 1.2843745032586235e-05, "loss": 0.0016, "step": 2950 }, { "epoch": 12.88, "learning_rate": 1.268478779208393e-05, "loss": 0.0144, "step": 3000 }, { "epoch": 13.0, "eval_f1": 0.7959241618420011, "eval_loss": 1.5725551843643188, "eval_runtime": 1.4849, "eval_samples_per_second": 390.601, "eval_steps_per_second": 1.347, "step": 3029 }, { "epoch": 13.09, "learning_rate": 1.2525830551581626e-05, "loss": 0.006, "step": 3050 }, { "epoch": 13.3, "learning_rate": 1.236687331107932e-05, "loss": 0.0056, "step": 3100 }, { "epoch": 13.52, "learning_rate": 1.2207916070577015e-05, "loss": 0.0114, "step": 3150 }, { "epoch": 13.73, "learning_rate": 1.204895883007471e-05, "loss": 0.021, "step": 3200 }, { "epoch": 13.95, "learning_rate": 1.1890001589572406e-05, "loss": 0.0436, "step": 3250 }, { "epoch": 14.0, "eval_f1": 0.7987626313618129, "eval_loss": 1.6159876585006714, "eval_runtime": 1.4555, "eval_samples_per_second": 398.497, "eval_steps_per_second": 1.374, "step": 3262 }, { "epoch": 14.16, "learning_rate": 1.1731044349070103e-05, "loss": 0.0002, "step": 3300 }, { "epoch": 14.38, "learning_rate": 1.1572087108567796e-05, "loss": 0.0062, "step": 3350 }, { "epoch": 14.59, "learning_rate": 1.141312986806549e-05, "loss": 0.0056, "step": 3400 }, { "epoch": 14.81, "learning_rate": 1.1254172627563187e-05, "loss": 0.0048, "step": 3450 }, { "epoch": 15.0, "eval_f1": 0.7957479636902922, "eval_loss": 1.6826026439666748, "eval_runtime": 1.4617, "eval_samples_per_second": 396.789, "eval_steps_per_second": 1.368, "step": 3495 }, { "epoch": 15.02, "learning_rate": 1.1095215387060883e-05, "loss": 0.0039, "step": 3500 }, { "epoch": 15.24, "learning_rate": 1.0936258146558576e-05, "loss": 0.0001, "step": 3550 }, { "epoch": 15.45, "learning_rate": 1.0777300906056272e-05, "loss": 0.0236, "step": 3600 }, { "epoch": 15.67, "learning_rate": 1.0618343665553967e-05, "loss": 0.0004, "step": 3650 }, { "epoch": 15.88, "learning_rate": 1.0459386425051663e-05, "loss": 0.0001, "step": 3700 }, { "epoch": 16.0, "eval_f1": 0.7956639409293647, "eval_loss": 1.6912556886672974, "eval_runtime": 1.4563, "eval_samples_per_second": 398.278, "eval_steps_per_second": 1.373, "step": 3728 }, { "epoch": 16.09, "learning_rate": 1.0300429184549356e-05, "loss": 0.0002, "step": 3750 }, { "epoch": 16.31, "learning_rate": 1.0141471944047053e-05, "loss": 0.0002, "step": 3800 }, { "epoch": 16.52, "learning_rate": 9.982514703544747e-06, "loss": 0.0006, "step": 3850 }, { "epoch": 16.74, "learning_rate": 9.823557463042442e-06, "loss": 0.0002, "step": 3900 }, { "epoch": 16.95, "learning_rate": 9.664600222540137e-06, "loss": 0.0001, "step": 3950 }, { "epoch": 17.0, "eval_f1": 0.7994751240525658, "eval_loss": 1.7075979709625244, "eval_runtime": 1.4886, "eval_samples_per_second": 389.634, "eval_steps_per_second": 1.344, "step": 3961 }, { "epoch": 17.17, "learning_rate": 9.505642982037833e-06, "loss": 0.0002, "step": 4000 }, { "epoch": 17.38, "learning_rate": 9.346685741535528e-06, "loss": 0.0185, "step": 4050 }, { "epoch": 17.6, "learning_rate": 9.187728501033222e-06, "loss": 0.0001, "step": 4100 }, { "epoch": 17.81, "learning_rate": 9.028771260530917e-06, "loss": 0.0034, "step": 4150 }, { "epoch": 18.0, "eval_f1": 0.7960354805040918, "eval_loss": 1.8018221855163574, "eval_runtime": 1.5408, "eval_samples_per_second": 376.422, "eval_steps_per_second": 1.298, "step": 4194 }, { "epoch": 18.03, "learning_rate": 8.869814020028613e-06, "loss": 0.013, "step": 4200 }, { "epoch": 18.24, "learning_rate": 8.710856779526308e-06, "loss": 0.0003, "step": 4250 }, { "epoch": 18.45, "learning_rate": 8.551899539024003e-06, "loss": 0.0001, "step": 4300 }, { "epoch": 18.67, "learning_rate": 8.392942298521697e-06, "loss": 0.0002, "step": 4350 }, { "epoch": 18.88, "learning_rate": 8.233985058019394e-06, "loss": 0.0228, "step": 4400 }, { "epoch": 19.0, "eval_f1": 0.7915974698658704, "eval_loss": 1.7456856966018677, "eval_runtime": 1.4762, "eval_samples_per_second": 392.912, "eval_steps_per_second": 1.355, "step": 4427 }, { "epoch": 19.1, "learning_rate": 8.075027817517088e-06, "loss": 0.0006, "step": 4450 }, { "epoch": 19.31, "learning_rate": 7.916070577014783e-06, "loss": 0.0037, "step": 4500 }, { "epoch": 19.53, "learning_rate": 7.757113336512478e-06, "loss": 0.0314, "step": 4550 }, { "epoch": 19.74, "learning_rate": 7.598156096010174e-06, "loss": 0.0028, "step": 4600 }, { "epoch": 19.96, "learning_rate": 7.439198855507869e-06, "loss": 0.0083, "step": 4650 }, { "epoch": 20.0, "eval_f1": 0.7868576028090374, "eval_loss": 1.9279075860977173, "eval_runtime": 1.4679, "eval_samples_per_second": 395.119, "eval_steps_per_second": 1.362, "step": 4660 }, { "epoch": 20.17, "learning_rate": 7.280241615005564e-06, "loss": 0.0009, "step": 4700 }, { "epoch": 20.39, "learning_rate": 7.121284374503259e-06, "loss": 0.0002, "step": 4750 }, { "epoch": 20.6, "learning_rate": 6.962327134000954e-06, "loss": 0.0082, "step": 4800 }, { "epoch": 20.82, "learning_rate": 6.803369893498649e-06, "loss": 0.0001, "step": 4850 }, { "epoch": 21.0, "eval_f1": 0.7915377946685866, "eval_loss": 1.8367053270339966, "eval_runtime": 2.0999, "eval_samples_per_second": 276.201, "eval_steps_per_second": 0.952, "step": 4893 }, { "epoch": 21.03, "learning_rate": 6.6444126529963445e-06, "loss": 0.0001, "step": 4900 }, { "epoch": 21.24, "learning_rate": 6.485455412494039e-06, "loss": 0.0072, "step": 4950 }, { "epoch": 21.46, "learning_rate": 6.326498171991735e-06, "loss": 0.0, "step": 5000 }, { "epoch": 21.67, "learning_rate": 6.167540931489429e-06, "loss": 0.0, "step": 5050 }, { "epoch": 21.89, "learning_rate": 6.008583690987126e-06, "loss": 0.0003, "step": 5100 }, { "epoch": 22.0, "eval_f1": 0.7842117575951872, "eval_loss": 1.8620420694351196, "eval_runtime": 1.8603, "eval_samples_per_second": 311.785, "eval_steps_per_second": 1.075, "step": 5126 }, { "epoch": 22.1, "learning_rate": 5.8496264504848195e-06, "loss": 0.0007, "step": 5150 }, { "epoch": 22.32, "learning_rate": 5.690669209982516e-06, "loss": 0.0, "step": 5200 }, { "epoch": 22.53, "learning_rate": 5.5317119694802105e-06, "loss": 0.0021, "step": 5250 }, { "epoch": 22.75, "learning_rate": 5.372754728977906e-06, "loss": 0.0077, "step": 5300 }, { "epoch": 22.96, "learning_rate": 5.213797488475601e-06, "loss": 0.0002, "step": 5350 }, { "epoch": 23.0, "eval_f1": 0.7828476594276503, "eval_loss": 1.919188141822815, "eval_runtime": 1.4859, "eval_samples_per_second": 390.344, "eval_steps_per_second": 1.346, "step": 5359 }, { "epoch": 23.18, "learning_rate": 5.054840247973296e-06, "loss": 0.0194, "step": 5400 }, { "epoch": 23.39, "learning_rate": 4.895883007470991e-06, "loss": 0.0132, "step": 5450 }, { "epoch": 23.61, "learning_rate": 4.7369257669686855e-06, "loss": 0.0001, "step": 5500 }, { "epoch": 23.82, "learning_rate": 4.577968526466381e-06, "loss": 0.0, "step": 5550 }, { "epoch": 24.0, "eval_f1": 0.7927310235612234, "eval_loss": 1.9081404209136963, "eval_runtime": 1.4831, "eval_samples_per_second": 391.082, "eval_steps_per_second": 1.349, "step": 5592 }, { "epoch": 24.03, "learning_rate": 4.419011285964076e-06, "loss": 0.0, "step": 5600 }, { "epoch": 24.25, "learning_rate": 4.260054045461771e-06, "loss": 0.0001, "step": 5650 }, { "epoch": 24.46, "learning_rate": 4.101096804959467e-06, "loss": 0.0122, "step": 5700 }, { "epoch": 24.68, "learning_rate": 3.942139564457161e-06, "loss": 0.0, "step": 5750 }, { "epoch": 24.89, "learning_rate": 3.7831823239548564e-06, "loss": 0.0003, "step": 5800 }, { "epoch": 25.0, "eval_f1": 0.7812550199347442, "eval_loss": 1.9822450876235962, "eval_runtime": 1.5174, "eval_samples_per_second": 382.228, "eval_steps_per_second": 1.318, "step": 5825 }, { "epoch": 25.11, "learning_rate": 3.6242250834525515e-06, "loss": 0.0116, "step": 5850 }, { "epoch": 25.32, "learning_rate": 3.4652678429502466e-06, "loss": 0.0, "step": 5900 }, { "epoch": 25.54, "learning_rate": 3.306310602447942e-06, "loss": 0.0004, "step": 5950 }, { "epoch": 25.75, "learning_rate": 3.147353361945637e-06, "loss": 0.0027, "step": 6000 }, { "epoch": 25.97, "learning_rate": 2.9883961214433322e-06, "loss": 0.0059, "step": 6050 }, { "epoch": 26.0, "eval_f1": 0.7953953204096383, "eval_loss": 1.8736791610717773, "eval_runtime": 1.4646, "eval_samples_per_second": 396.003, "eval_steps_per_second": 1.366, "step": 6058 }, { "epoch": 26.18, "learning_rate": 2.8294388809410273e-06, "loss": 0.0001, "step": 6100 }, { "epoch": 26.39, "learning_rate": 2.6704816404387224e-06, "loss": 0.0, "step": 6150 }, { "epoch": 26.61, "learning_rate": 2.5115243999364175e-06, "loss": 0.0, "step": 6200 }, { "epoch": 26.82, "learning_rate": 2.3525671594341126e-06, "loss": 0.0, "step": 6250 }, { "epoch": 27.0, "eval_f1": 0.7929217495075929, "eval_loss": 1.879309892654419, "eval_runtime": 1.9514, "eval_samples_per_second": 297.228, "eval_steps_per_second": 1.025, "step": 6291 }, { "epoch": 27.04, "learning_rate": 2.1936099189318076e-06, "loss": 0.0, "step": 6300 }, { "epoch": 27.25, "learning_rate": 2.0346526784295027e-06, "loss": 0.0, "step": 6350 }, { "epoch": 27.47, "learning_rate": 1.8756954379271978e-06, "loss": 0.0111, "step": 6400 }, { "epoch": 27.68, "learning_rate": 1.7167381974248929e-06, "loss": 0.0, "step": 6450 }, { "epoch": 27.9, "learning_rate": 1.557780956922588e-06, "loss": 0.0, "step": 6500 }, { "epoch": 28.0, "eval_f1": 0.794029634093503, "eval_loss": 1.8904625177383423, "eval_runtime": 2.2478, "eval_samples_per_second": 258.035, "eval_steps_per_second": 0.89, "step": 6524 }, { "epoch": 28.11, "learning_rate": 1.398823716420283e-06, "loss": 0.0, "step": 6550 }, { "epoch": 28.33, "learning_rate": 1.2398664759179781e-06, "loss": 0.0, "step": 6600 }, { "epoch": 28.54, "learning_rate": 1.0809092354156734e-06, "loss": 0.0, "step": 6650 }, { "epoch": 28.76, "learning_rate": 9.219519949133683e-07, "loss": 0.0, "step": 6700 }, { "epoch": 28.97, "learning_rate": 7.629947544110635e-07, "loss": 0.0, "step": 6750 }, { "epoch": 29.0, "eval_f1": 0.794029634093503, "eval_loss": 1.8970826864242554, "eval_runtime": 1.9492, "eval_samples_per_second": 297.551, "eval_steps_per_second": 1.026, "step": 6757 }, { "epoch": 29.18, "learning_rate": 6.040375139087585e-07, "loss": 0.0, "step": 6800 }, { "epoch": 29.4, "learning_rate": 4.450802734064537e-07, "loss": 0.0, "step": 6850 }, { "epoch": 29.61, "learning_rate": 2.861230329041488e-07, "loss": 0.0, "step": 6900 }, { "epoch": 29.83, "learning_rate": 1.2716579240184392e-07, "loss": 0.0002, "step": 6950 }, { "epoch": 30.0, "eval_f1": 0.7954091951908298, "eval_loss": 1.9001948833465576, "eval_runtime": 1.8428, "eval_samples_per_second": 314.746, "eval_steps_per_second": 1.085, "step": 6990 }, { "epoch": 30.0, "step": 6990, "total_flos": 5566168764425088.0, "train_loss": 0.16078996370909257, "train_runtime": 2045.6711, "train_samples_per_second": 54.496, "train_steps_per_second": 3.417 } ], "logging_steps": 50, "max_steps": 6990, "num_train_epochs": 30, "save_steps": 500, "total_flos": 5566168764425088.0, "trial_name": null, "trial_params": null }