roberta-large-question-classifier / trainer_state.json
jantrienes's picture
End of training
e8c0878
{
"best_metric": 0.8123190611646329,
"best_model_checkpoint": "output/roberta-large-question-classifier/checkpoint-2563",
"epoch": 30.0,
"eval_steps": 500,
"global_step": 6990,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.21,
"learning_rate": 1.430615164520744e-06,
"loss": 2.3372,
"step": 50
},
{
"epoch": 0.43,
"learning_rate": 2.861230329041488e-06,
"loss": 2.276,
"step": 100
},
{
"epoch": 0.64,
"learning_rate": 4.291845493562232e-06,
"loss": 2.1988,
"step": 150
},
{
"epoch": 0.86,
"learning_rate": 5.722460658082976e-06,
"loss": 1.9467,
"step": 200
},
{
"epoch": 1.0,
"eval_f1": 0.4050404697492347,
"eval_loss": 1.3099409341812134,
"eval_runtime": 1.3906,
"eval_samples_per_second": 417.1,
"eval_steps_per_second": 1.438,
"step": 233
},
{
"epoch": 1.07,
"learning_rate": 7.15307582260372e-06,
"loss": 1.5551,
"step": 250
},
{
"epoch": 1.29,
"learning_rate": 8.583690987124465e-06,
"loss": 1.0537,
"step": 300
},
{
"epoch": 1.5,
"learning_rate": 1.0014306151645208e-05,
"loss": 0.872,
"step": 350
},
{
"epoch": 1.72,
"learning_rate": 1.1444921316165953e-05,
"loss": 0.6619,
"step": 400
},
{
"epoch": 1.93,
"learning_rate": 1.2875536480686697e-05,
"loss": 0.6381,
"step": 450
},
{
"epoch": 2.0,
"eval_f1": 0.7785421184302428,
"eval_loss": 0.5586220622062683,
"eval_runtime": 1.4464,
"eval_samples_per_second": 400.997,
"eval_steps_per_second": 1.383,
"step": 466
},
{
"epoch": 2.15,
"learning_rate": 1.430615164520744e-05,
"loss": 0.509,
"step": 500
},
{
"epoch": 2.36,
"learning_rate": 1.5736766809728185e-05,
"loss": 0.5387,
"step": 550
},
{
"epoch": 2.58,
"learning_rate": 1.716738197424893e-05,
"loss": 0.5163,
"step": 600
},
{
"epoch": 2.79,
"learning_rate": 1.859799713876967e-05,
"loss": 0.628,
"step": 650
},
{
"epoch": 3.0,
"eval_f1": 0.7831151120797589,
"eval_loss": 0.6418800354003906,
"eval_runtime": 1.467,
"eval_samples_per_second": 395.356,
"eval_steps_per_second": 1.363,
"step": 699
},
{
"epoch": 3.0,
"learning_rate": 1.9996820855189955e-05,
"loss": 0.5632,
"step": 700
},
{
"epoch": 3.22,
"learning_rate": 1.983786361468765e-05,
"loss": 0.4046,
"step": 750
},
{
"epoch": 3.43,
"learning_rate": 1.9678906374185345e-05,
"loss": 0.3985,
"step": 800
},
{
"epoch": 3.65,
"learning_rate": 1.951994913368304e-05,
"loss": 0.4307,
"step": 850
},
{
"epoch": 3.86,
"learning_rate": 1.9360991893180737e-05,
"loss": 0.4487,
"step": 900
},
{
"epoch": 4.0,
"eval_f1": 0.8093842888236766,
"eval_loss": 0.5770355463027954,
"eval_runtime": 1.4647,
"eval_samples_per_second": 395.985,
"eval_steps_per_second": 1.365,
"step": 932
},
{
"epoch": 4.08,
"learning_rate": 1.9202034652678432e-05,
"loss": 0.3373,
"step": 950
},
{
"epoch": 4.29,
"learning_rate": 1.9043077412176127e-05,
"loss": 0.2578,
"step": 1000
},
{
"epoch": 4.51,
"learning_rate": 1.888412017167382e-05,
"loss": 0.2675,
"step": 1050
},
{
"epoch": 4.72,
"learning_rate": 1.8725162931171516e-05,
"loss": 0.2697,
"step": 1100
},
{
"epoch": 4.94,
"learning_rate": 1.8566205690669214e-05,
"loss": 0.3319,
"step": 1150
},
{
"epoch": 5.0,
"eval_f1": 0.7952503005676876,
"eval_loss": 0.7712982296943665,
"eval_runtime": 1.5475,
"eval_samples_per_second": 374.79,
"eval_steps_per_second": 1.292,
"step": 1165
},
{
"epoch": 5.15,
"learning_rate": 1.8407248450166905e-05,
"loss": 0.2049,
"step": 1200
},
{
"epoch": 5.36,
"learning_rate": 1.82482912096646e-05,
"loss": 0.2344,
"step": 1250
},
{
"epoch": 5.58,
"learning_rate": 1.8089333969162298e-05,
"loss": 0.1843,
"step": 1300
},
{
"epoch": 5.79,
"learning_rate": 1.7930376728659993e-05,
"loss": 0.2095,
"step": 1350
},
{
"epoch": 6.0,
"eval_f1": 0.8017807103839256,
"eval_loss": 0.8798965811729431,
"eval_runtime": 1.4572,
"eval_samples_per_second": 398.025,
"eval_steps_per_second": 1.372,
"step": 1398
},
{
"epoch": 6.01,
"learning_rate": 1.7771419488157687e-05,
"loss": 0.2039,
"step": 1400
},
{
"epoch": 6.22,
"learning_rate": 1.7612462247655382e-05,
"loss": 0.0876,
"step": 1450
},
{
"epoch": 6.44,
"learning_rate": 1.7453505007153077e-05,
"loss": 0.1054,
"step": 1500
},
{
"epoch": 6.65,
"learning_rate": 1.7294547766650775e-05,
"loss": 0.1629,
"step": 1550
},
{
"epoch": 6.87,
"learning_rate": 1.7135590526148466e-05,
"loss": 0.1355,
"step": 1600
},
{
"epoch": 7.0,
"eval_f1": 0.7961224122154954,
"eval_loss": 1.0646474361419678,
"eval_runtime": 1.4774,
"eval_samples_per_second": 392.581,
"eval_steps_per_second": 1.354,
"step": 1631
},
{
"epoch": 7.08,
"learning_rate": 1.6976633285646164e-05,
"loss": 0.1457,
"step": 1650
},
{
"epoch": 7.3,
"learning_rate": 1.681767604514386e-05,
"loss": 0.0861,
"step": 1700
},
{
"epoch": 7.51,
"learning_rate": 1.6658718804641553e-05,
"loss": 0.0852,
"step": 1750
},
{
"epoch": 7.73,
"learning_rate": 1.6499761564139248e-05,
"loss": 0.1283,
"step": 1800
},
{
"epoch": 7.94,
"learning_rate": 1.6340804323636943e-05,
"loss": 0.0956,
"step": 1850
},
{
"epoch": 8.0,
"eval_f1": 0.7998718228606326,
"eval_loss": 1.2174800634384155,
"eval_runtime": 1.5358,
"eval_samples_per_second": 377.658,
"eval_steps_per_second": 1.302,
"step": 1864
},
{
"epoch": 8.15,
"learning_rate": 1.6181847083134637e-05,
"loss": 0.0862,
"step": 1900
},
{
"epoch": 8.37,
"learning_rate": 1.6022889842632335e-05,
"loss": 0.0486,
"step": 1950
},
{
"epoch": 8.58,
"learning_rate": 1.5863932602130026e-05,
"loss": 0.0321,
"step": 2000
},
{
"epoch": 8.8,
"learning_rate": 1.5704975361627725e-05,
"loss": 0.0687,
"step": 2050
},
{
"epoch": 9.0,
"eval_f1": 0.789186529273271,
"eval_loss": 1.3646652698516846,
"eval_runtime": 1.5089,
"eval_samples_per_second": 384.398,
"eval_steps_per_second": 1.326,
"step": 2097
},
{
"epoch": 9.01,
"learning_rate": 1.554601812112542e-05,
"loss": 0.0522,
"step": 2100
},
{
"epoch": 9.23,
"learning_rate": 1.5387060880623114e-05,
"loss": 0.0349,
"step": 2150
},
{
"epoch": 9.44,
"learning_rate": 1.5228103640120809e-05,
"loss": 0.0529,
"step": 2200
},
{
"epoch": 9.66,
"learning_rate": 1.5069146399618503e-05,
"loss": 0.0284,
"step": 2250
},
{
"epoch": 9.87,
"learning_rate": 1.49101891591162e-05,
"loss": 0.0371,
"step": 2300
},
{
"epoch": 10.0,
"eval_f1": 0.7986917021269787,
"eval_loss": 1.3809223175048828,
"eval_runtime": 1.6909,
"eval_samples_per_second": 343.007,
"eval_steps_per_second": 1.183,
"step": 2330
},
{
"epoch": 10.09,
"learning_rate": 1.4751231918613892e-05,
"loss": 0.0143,
"step": 2350
},
{
"epoch": 10.3,
"learning_rate": 1.4592274678111589e-05,
"loss": 0.0012,
"step": 2400
},
{
"epoch": 10.52,
"learning_rate": 1.4433317437609285e-05,
"loss": 0.0117,
"step": 2450
},
{
"epoch": 10.73,
"learning_rate": 1.427436019710698e-05,
"loss": 0.0248,
"step": 2500
},
{
"epoch": 10.94,
"learning_rate": 1.4115402956604673e-05,
"loss": 0.0303,
"step": 2550
},
{
"epoch": 11.0,
"eval_f1": 0.8123190611646329,
"eval_loss": 1.3591104745864868,
"eval_runtime": 1.57,
"eval_samples_per_second": 369.434,
"eval_steps_per_second": 1.274,
"step": 2563
},
{
"epoch": 11.16,
"learning_rate": 1.395644571610237e-05,
"loss": 0.0142,
"step": 2600
},
{
"epoch": 11.37,
"learning_rate": 1.3797488475600066e-05,
"loss": 0.0136,
"step": 2650
},
{
"epoch": 11.59,
"learning_rate": 1.363853123509776e-05,
"loss": 0.0126,
"step": 2700
},
{
"epoch": 11.8,
"learning_rate": 1.3479573994595455e-05,
"loss": 0.0263,
"step": 2750
},
{
"epoch": 12.0,
"eval_f1": 0.8100291935535177,
"eval_loss": 1.5316766500473022,
"eval_runtime": 1.5184,
"eval_samples_per_second": 381.982,
"eval_steps_per_second": 1.317,
"step": 2796
},
{
"epoch": 12.02,
"learning_rate": 1.332061675409315e-05,
"loss": 0.011,
"step": 2800
},
{
"epoch": 12.23,
"learning_rate": 1.3161659513590846e-05,
"loss": 0.0002,
"step": 2850
},
{
"epoch": 12.45,
"learning_rate": 1.300270227308854e-05,
"loss": 0.0057,
"step": 2900
},
{
"epoch": 12.66,
"learning_rate": 1.2843745032586235e-05,
"loss": 0.0016,
"step": 2950
},
{
"epoch": 12.88,
"learning_rate": 1.268478779208393e-05,
"loss": 0.0144,
"step": 3000
},
{
"epoch": 13.0,
"eval_f1": 0.7959241618420011,
"eval_loss": 1.5725551843643188,
"eval_runtime": 1.4849,
"eval_samples_per_second": 390.601,
"eval_steps_per_second": 1.347,
"step": 3029
},
{
"epoch": 13.09,
"learning_rate": 1.2525830551581626e-05,
"loss": 0.006,
"step": 3050
},
{
"epoch": 13.3,
"learning_rate": 1.236687331107932e-05,
"loss": 0.0056,
"step": 3100
},
{
"epoch": 13.52,
"learning_rate": 1.2207916070577015e-05,
"loss": 0.0114,
"step": 3150
},
{
"epoch": 13.73,
"learning_rate": 1.204895883007471e-05,
"loss": 0.021,
"step": 3200
},
{
"epoch": 13.95,
"learning_rate": 1.1890001589572406e-05,
"loss": 0.0436,
"step": 3250
},
{
"epoch": 14.0,
"eval_f1": 0.7987626313618129,
"eval_loss": 1.6159876585006714,
"eval_runtime": 1.4555,
"eval_samples_per_second": 398.497,
"eval_steps_per_second": 1.374,
"step": 3262
},
{
"epoch": 14.16,
"learning_rate": 1.1731044349070103e-05,
"loss": 0.0002,
"step": 3300
},
{
"epoch": 14.38,
"learning_rate": 1.1572087108567796e-05,
"loss": 0.0062,
"step": 3350
},
{
"epoch": 14.59,
"learning_rate": 1.141312986806549e-05,
"loss": 0.0056,
"step": 3400
},
{
"epoch": 14.81,
"learning_rate": 1.1254172627563187e-05,
"loss": 0.0048,
"step": 3450
},
{
"epoch": 15.0,
"eval_f1": 0.7957479636902922,
"eval_loss": 1.6826026439666748,
"eval_runtime": 1.4617,
"eval_samples_per_second": 396.789,
"eval_steps_per_second": 1.368,
"step": 3495
},
{
"epoch": 15.02,
"learning_rate": 1.1095215387060883e-05,
"loss": 0.0039,
"step": 3500
},
{
"epoch": 15.24,
"learning_rate": 1.0936258146558576e-05,
"loss": 0.0001,
"step": 3550
},
{
"epoch": 15.45,
"learning_rate": 1.0777300906056272e-05,
"loss": 0.0236,
"step": 3600
},
{
"epoch": 15.67,
"learning_rate": 1.0618343665553967e-05,
"loss": 0.0004,
"step": 3650
},
{
"epoch": 15.88,
"learning_rate": 1.0459386425051663e-05,
"loss": 0.0001,
"step": 3700
},
{
"epoch": 16.0,
"eval_f1": 0.7956639409293647,
"eval_loss": 1.6912556886672974,
"eval_runtime": 1.4563,
"eval_samples_per_second": 398.278,
"eval_steps_per_second": 1.373,
"step": 3728
},
{
"epoch": 16.09,
"learning_rate": 1.0300429184549356e-05,
"loss": 0.0002,
"step": 3750
},
{
"epoch": 16.31,
"learning_rate": 1.0141471944047053e-05,
"loss": 0.0002,
"step": 3800
},
{
"epoch": 16.52,
"learning_rate": 9.982514703544747e-06,
"loss": 0.0006,
"step": 3850
},
{
"epoch": 16.74,
"learning_rate": 9.823557463042442e-06,
"loss": 0.0002,
"step": 3900
},
{
"epoch": 16.95,
"learning_rate": 9.664600222540137e-06,
"loss": 0.0001,
"step": 3950
},
{
"epoch": 17.0,
"eval_f1": 0.7994751240525658,
"eval_loss": 1.7075979709625244,
"eval_runtime": 1.4886,
"eval_samples_per_second": 389.634,
"eval_steps_per_second": 1.344,
"step": 3961
},
{
"epoch": 17.17,
"learning_rate": 9.505642982037833e-06,
"loss": 0.0002,
"step": 4000
},
{
"epoch": 17.38,
"learning_rate": 9.346685741535528e-06,
"loss": 0.0185,
"step": 4050
},
{
"epoch": 17.6,
"learning_rate": 9.187728501033222e-06,
"loss": 0.0001,
"step": 4100
},
{
"epoch": 17.81,
"learning_rate": 9.028771260530917e-06,
"loss": 0.0034,
"step": 4150
},
{
"epoch": 18.0,
"eval_f1": 0.7960354805040918,
"eval_loss": 1.8018221855163574,
"eval_runtime": 1.5408,
"eval_samples_per_second": 376.422,
"eval_steps_per_second": 1.298,
"step": 4194
},
{
"epoch": 18.03,
"learning_rate": 8.869814020028613e-06,
"loss": 0.013,
"step": 4200
},
{
"epoch": 18.24,
"learning_rate": 8.710856779526308e-06,
"loss": 0.0003,
"step": 4250
},
{
"epoch": 18.45,
"learning_rate": 8.551899539024003e-06,
"loss": 0.0001,
"step": 4300
},
{
"epoch": 18.67,
"learning_rate": 8.392942298521697e-06,
"loss": 0.0002,
"step": 4350
},
{
"epoch": 18.88,
"learning_rate": 8.233985058019394e-06,
"loss": 0.0228,
"step": 4400
},
{
"epoch": 19.0,
"eval_f1": 0.7915974698658704,
"eval_loss": 1.7456856966018677,
"eval_runtime": 1.4762,
"eval_samples_per_second": 392.912,
"eval_steps_per_second": 1.355,
"step": 4427
},
{
"epoch": 19.1,
"learning_rate": 8.075027817517088e-06,
"loss": 0.0006,
"step": 4450
},
{
"epoch": 19.31,
"learning_rate": 7.916070577014783e-06,
"loss": 0.0037,
"step": 4500
},
{
"epoch": 19.53,
"learning_rate": 7.757113336512478e-06,
"loss": 0.0314,
"step": 4550
},
{
"epoch": 19.74,
"learning_rate": 7.598156096010174e-06,
"loss": 0.0028,
"step": 4600
},
{
"epoch": 19.96,
"learning_rate": 7.439198855507869e-06,
"loss": 0.0083,
"step": 4650
},
{
"epoch": 20.0,
"eval_f1": 0.7868576028090374,
"eval_loss": 1.9279075860977173,
"eval_runtime": 1.4679,
"eval_samples_per_second": 395.119,
"eval_steps_per_second": 1.362,
"step": 4660
},
{
"epoch": 20.17,
"learning_rate": 7.280241615005564e-06,
"loss": 0.0009,
"step": 4700
},
{
"epoch": 20.39,
"learning_rate": 7.121284374503259e-06,
"loss": 0.0002,
"step": 4750
},
{
"epoch": 20.6,
"learning_rate": 6.962327134000954e-06,
"loss": 0.0082,
"step": 4800
},
{
"epoch": 20.82,
"learning_rate": 6.803369893498649e-06,
"loss": 0.0001,
"step": 4850
},
{
"epoch": 21.0,
"eval_f1": 0.7915377946685866,
"eval_loss": 1.8367053270339966,
"eval_runtime": 2.0999,
"eval_samples_per_second": 276.201,
"eval_steps_per_second": 0.952,
"step": 4893
},
{
"epoch": 21.03,
"learning_rate": 6.6444126529963445e-06,
"loss": 0.0001,
"step": 4900
},
{
"epoch": 21.24,
"learning_rate": 6.485455412494039e-06,
"loss": 0.0072,
"step": 4950
},
{
"epoch": 21.46,
"learning_rate": 6.326498171991735e-06,
"loss": 0.0,
"step": 5000
},
{
"epoch": 21.67,
"learning_rate": 6.167540931489429e-06,
"loss": 0.0,
"step": 5050
},
{
"epoch": 21.89,
"learning_rate": 6.008583690987126e-06,
"loss": 0.0003,
"step": 5100
},
{
"epoch": 22.0,
"eval_f1": 0.7842117575951872,
"eval_loss": 1.8620420694351196,
"eval_runtime": 1.8603,
"eval_samples_per_second": 311.785,
"eval_steps_per_second": 1.075,
"step": 5126
},
{
"epoch": 22.1,
"learning_rate": 5.8496264504848195e-06,
"loss": 0.0007,
"step": 5150
},
{
"epoch": 22.32,
"learning_rate": 5.690669209982516e-06,
"loss": 0.0,
"step": 5200
},
{
"epoch": 22.53,
"learning_rate": 5.5317119694802105e-06,
"loss": 0.0021,
"step": 5250
},
{
"epoch": 22.75,
"learning_rate": 5.372754728977906e-06,
"loss": 0.0077,
"step": 5300
},
{
"epoch": 22.96,
"learning_rate": 5.213797488475601e-06,
"loss": 0.0002,
"step": 5350
},
{
"epoch": 23.0,
"eval_f1": 0.7828476594276503,
"eval_loss": 1.919188141822815,
"eval_runtime": 1.4859,
"eval_samples_per_second": 390.344,
"eval_steps_per_second": 1.346,
"step": 5359
},
{
"epoch": 23.18,
"learning_rate": 5.054840247973296e-06,
"loss": 0.0194,
"step": 5400
},
{
"epoch": 23.39,
"learning_rate": 4.895883007470991e-06,
"loss": 0.0132,
"step": 5450
},
{
"epoch": 23.61,
"learning_rate": 4.7369257669686855e-06,
"loss": 0.0001,
"step": 5500
},
{
"epoch": 23.82,
"learning_rate": 4.577968526466381e-06,
"loss": 0.0,
"step": 5550
},
{
"epoch": 24.0,
"eval_f1": 0.7927310235612234,
"eval_loss": 1.9081404209136963,
"eval_runtime": 1.4831,
"eval_samples_per_second": 391.082,
"eval_steps_per_second": 1.349,
"step": 5592
},
{
"epoch": 24.03,
"learning_rate": 4.419011285964076e-06,
"loss": 0.0,
"step": 5600
},
{
"epoch": 24.25,
"learning_rate": 4.260054045461771e-06,
"loss": 0.0001,
"step": 5650
},
{
"epoch": 24.46,
"learning_rate": 4.101096804959467e-06,
"loss": 0.0122,
"step": 5700
},
{
"epoch": 24.68,
"learning_rate": 3.942139564457161e-06,
"loss": 0.0,
"step": 5750
},
{
"epoch": 24.89,
"learning_rate": 3.7831823239548564e-06,
"loss": 0.0003,
"step": 5800
},
{
"epoch": 25.0,
"eval_f1": 0.7812550199347442,
"eval_loss": 1.9822450876235962,
"eval_runtime": 1.5174,
"eval_samples_per_second": 382.228,
"eval_steps_per_second": 1.318,
"step": 5825
},
{
"epoch": 25.11,
"learning_rate": 3.6242250834525515e-06,
"loss": 0.0116,
"step": 5850
},
{
"epoch": 25.32,
"learning_rate": 3.4652678429502466e-06,
"loss": 0.0,
"step": 5900
},
{
"epoch": 25.54,
"learning_rate": 3.306310602447942e-06,
"loss": 0.0004,
"step": 5950
},
{
"epoch": 25.75,
"learning_rate": 3.147353361945637e-06,
"loss": 0.0027,
"step": 6000
},
{
"epoch": 25.97,
"learning_rate": 2.9883961214433322e-06,
"loss": 0.0059,
"step": 6050
},
{
"epoch": 26.0,
"eval_f1": 0.7953953204096383,
"eval_loss": 1.8736791610717773,
"eval_runtime": 1.4646,
"eval_samples_per_second": 396.003,
"eval_steps_per_second": 1.366,
"step": 6058
},
{
"epoch": 26.18,
"learning_rate": 2.8294388809410273e-06,
"loss": 0.0001,
"step": 6100
},
{
"epoch": 26.39,
"learning_rate": 2.6704816404387224e-06,
"loss": 0.0,
"step": 6150
},
{
"epoch": 26.61,
"learning_rate": 2.5115243999364175e-06,
"loss": 0.0,
"step": 6200
},
{
"epoch": 26.82,
"learning_rate": 2.3525671594341126e-06,
"loss": 0.0,
"step": 6250
},
{
"epoch": 27.0,
"eval_f1": 0.7929217495075929,
"eval_loss": 1.879309892654419,
"eval_runtime": 1.9514,
"eval_samples_per_second": 297.228,
"eval_steps_per_second": 1.025,
"step": 6291
},
{
"epoch": 27.04,
"learning_rate": 2.1936099189318076e-06,
"loss": 0.0,
"step": 6300
},
{
"epoch": 27.25,
"learning_rate": 2.0346526784295027e-06,
"loss": 0.0,
"step": 6350
},
{
"epoch": 27.47,
"learning_rate": 1.8756954379271978e-06,
"loss": 0.0111,
"step": 6400
},
{
"epoch": 27.68,
"learning_rate": 1.7167381974248929e-06,
"loss": 0.0,
"step": 6450
},
{
"epoch": 27.9,
"learning_rate": 1.557780956922588e-06,
"loss": 0.0,
"step": 6500
},
{
"epoch": 28.0,
"eval_f1": 0.794029634093503,
"eval_loss": 1.8904625177383423,
"eval_runtime": 2.2478,
"eval_samples_per_second": 258.035,
"eval_steps_per_second": 0.89,
"step": 6524
},
{
"epoch": 28.11,
"learning_rate": 1.398823716420283e-06,
"loss": 0.0,
"step": 6550
},
{
"epoch": 28.33,
"learning_rate": 1.2398664759179781e-06,
"loss": 0.0,
"step": 6600
},
{
"epoch": 28.54,
"learning_rate": 1.0809092354156734e-06,
"loss": 0.0,
"step": 6650
},
{
"epoch": 28.76,
"learning_rate": 9.219519949133683e-07,
"loss": 0.0,
"step": 6700
},
{
"epoch": 28.97,
"learning_rate": 7.629947544110635e-07,
"loss": 0.0,
"step": 6750
},
{
"epoch": 29.0,
"eval_f1": 0.794029634093503,
"eval_loss": 1.8970826864242554,
"eval_runtime": 1.9492,
"eval_samples_per_second": 297.551,
"eval_steps_per_second": 1.026,
"step": 6757
},
{
"epoch": 29.18,
"learning_rate": 6.040375139087585e-07,
"loss": 0.0,
"step": 6800
},
{
"epoch": 29.4,
"learning_rate": 4.450802734064537e-07,
"loss": 0.0,
"step": 6850
},
{
"epoch": 29.61,
"learning_rate": 2.861230329041488e-07,
"loss": 0.0,
"step": 6900
},
{
"epoch": 29.83,
"learning_rate": 1.2716579240184392e-07,
"loss": 0.0002,
"step": 6950
},
{
"epoch": 30.0,
"eval_f1": 0.7954091951908298,
"eval_loss": 1.9001948833465576,
"eval_runtime": 1.8428,
"eval_samples_per_second": 314.746,
"eval_steps_per_second": 1.085,
"step": 6990
},
{
"epoch": 30.0,
"step": 6990,
"total_flos": 5566168764425088.0,
"train_loss": 0.16078996370909257,
"train_runtime": 2045.6711,
"train_samples_per_second": 54.496,
"train_steps_per_second": 3.417
}
],
"logging_steps": 50,
"max_steps": 6990,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 5566168764425088.0,
"trial_name": null,
"trial_params": null
}