hos_sentiment_bert / trainer_state.json
hts98's picture
End of training
01c2017 verified
{
"best_metric": 0.9326343966094134,
"best_model_checkpoint": "/tmp/classification_hos_bert/checkpoint-662",
"epoch": 40.0,
"eval_steps": 500,
"global_step": 13240,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_f1": 0.9258309167967879,
"eval_loss": 0.21829327940940857,
"eval_runtime": 25.1238,
"eval_samples_per_second": 356.873,
"eval_steps_per_second": 5.612,
"step": 331
},
{
"epoch": 1.510574018126888,
"grad_norm": 2.2944066524505615,
"learning_rate": 2.8867069486404837e-05,
"loss": 0.239,
"step": 500
},
{
"epoch": 2.0,
"eval_f1": 0.9326343966094134,
"eval_loss": 0.21681125462055206,
"eval_runtime": 25.1665,
"eval_samples_per_second": 356.267,
"eval_steps_per_second": 5.603,
"step": 662
},
{
"epoch": 3.0,
"eval_f1": 0.9281730983716261,
"eval_loss": 0.25273850560188293,
"eval_runtime": 25.1148,
"eval_samples_per_second": 357.001,
"eval_steps_per_second": 5.614,
"step": 993
},
{
"epoch": 3.0211480362537766,
"grad_norm": 0.6900779604911804,
"learning_rate": 2.7734138972809666e-05,
"loss": 0.1255,
"step": 1000
},
{
"epoch": 4.0,
"eval_f1": 0.9288422931072943,
"eval_loss": 0.28959983587265015,
"eval_runtime": 25.1245,
"eval_samples_per_second": 356.863,
"eval_steps_per_second": 5.612,
"step": 1324
},
{
"epoch": 4.531722054380665,
"grad_norm": 1.7124501466751099,
"learning_rate": 2.66012084592145e-05,
"loss": 0.0662,
"step": 1500
},
{
"epoch": 5.0,
"eval_f1": 0.9266116439884007,
"eval_loss": 0.33890488743782043,
"eval_runtime": 25.1434,
"eval_samples_per_second": 356.595,
"eval_steps_per_second": 5.608,
"step": 1655
},
{
"epoch": 6.0,
"eval_f1": 0.9293999553870176,
"eval_loss": 0.3792820870876312,
"eval_runtime": 25.1391,
"eval_samples_per_second": 356.656,
"eval_steps_per_second": 5.609,
"step": 1986
},
{
"epoch": 6.042296072507553,
"grad_norm": 0.8430729508399963,
"learning_rate": 2.5468277945619337e-05,
"loss": 0.0453,
"step": 2000
},
{
"epoch": 7.0,
"eval_f1": 0.9251617220611198,
"eval_loss": 0.41103312373161316,
"eval_runtime": 25.1418,
"eval_samples_per_second": 356.617,
"eval_steps_per_second": 5.608,
"step": 2317
},
{
"epoch": 7.552870090634441,
"grad_norm": 0.8941565752029419,
"learning_rate": 2.433534743202417e-05,
"loss": 0.0257,
"step": 2500
},
{
"epoch": 8.0,
"eval_f1": 0.9204773589114432,
"eval_loss": 0.4656200110912323,
"eval_runtime": 25.1662,
"eval_samples_per_second": 356.271,
"eval_steps_per_second": 5.603,
"step": 2648
},
{
"epoch": 9.0,
"eval_f1": 0.9262770466205665,
"eval_loss": 0.49531668424606323,
"eval_runtime": 25.1846,
"eval_samples_per_second": 356.011,
"eval_steps_per_second": 5.599,
"step": 2979
},
{
"epoch": 9.06344410876133,
"grad_norm": 0.01369735598564148,
"learning_rate": 2.3202416918429002e-05,
"loss": 0.0196,
"step": 3000
},
{
"epoch": 10.0,
"eval_f1": 0.9265001115324559,
"eval_loss": 0.5412325263023376,
"eval_runtime": 25.1393,
"eval_samples_per_second": 356.653,
"eval_steps_per_second": 5.609,
"step": 3310
},
{
"epoch": 10.574018126888218,
"grad_norm": 1.39247727394104,
"learning_rate": 2.2069486404833838e-05,
"loss": 0.0125,
"step": 3500
},
{
"epoch": 11.0,
"eval_f1": 0.9244925273254517,
"eval_loss": 0.5528218150138855,
"eval_runtime": 25.112,
"eval_samples_per_second": 357.04,
"eval_steps_per_second": 5.615,
"step": 3641
},
{
"epoch": 12.0,
"eval_f1": 0.9261655141646219,
"eval_loss": 0.5526648759841919,
"eval_runtime": 25.1453,
"eval_samples_per_second": 356.568,
"eval_steps_per_second": 5.607,
"step": 3972
},
{
"epoch": 12.084592145015106,
"grad_norm": 0.3373314440250397,
"learning_rate": 2.093655589123867e-05,
"loss": 0.0141,
"step": 4000
},
{
"epoch": 13.0,
"eval_f1": 0.9276154360919028,
"eval_loss": 0.5682665705680847,
"eval_runtime": 25.1511,
"eval_samples_per_second": 356.486,
"eval_steps_per_second": 5.606,
"step": 4303
},
{
"epoch": 13.595166163141993,
"grad_norm": 0.07623090595006943,
"learning_rate": 1.9803625377643507e-05,
"loss": 0.0097,
"step": 4500
},
{
"epoch": 14.0,
"eval_f1": 0.9239348650457283,
"eval_loss": 0.5835373997688293,
"eval_runtime": 25.143,
"eval_samples_per_second": 356.6,
"eval_steps_per_second": 5.608,
"step": 4634
},
{
"epoch": 15.0,
"eval_f1": 0.9279500334597368,
"eval_loss": 0.5905042886734009,
"eval_runtime": 25.1447,
"eval_samples_per_second": 356.576,
"eval_steps_per_second": 5.608,
"step": 4965
},
{
"epoch": 15.105740181268882,
"grad_norm": 0.01646752655506134,
"learning_rate": 1.867069486404834e-05,
"loss": 0.0107,
"step": 5000
},
{
"epoch": 16.0,
"eval_f1": 0.9298460852107964,
"eval_loss": 0.5799357295036316,
"eval_runtime": 25.142,
"eval_samples_per_second": 356.615,
"eval_steps_per_second": 5.608,
"step": 5296
},
{
"epoch": 16.61631419939577,
"grad_norm": 0.061389509588479996,
"learning_rate": 1.753776435045317e-05,
"loss": 0.009,
"step": 5500
},
{
"epoch": 17.0,
"eval_f1": 0.9266116439884007,
"eval_loss": 0.6126909255981445,
"eval_runtime": 25.1653,
"eval_samples_per_second": 356.285,
"eval_steps_per_second": 5.603,
"step": 5627
},
{
"epoch": 18.0,
"eval_f1": 0.9283961632835155,
"eval_loss": 0.591077446937561,
"eval_runtime": 25.1503,
"eval_samples_per_second": 356.497,
"eval_steps_per_second": 5.606,
"step": 5958
},
{
"epoch": 18.12688821752266,
"grad_norm": 0.0015490599907934666,
"learning_rate": 1.6404833836858007e-05,
"loss": 0.0084,
"step": 6000
},
{
"epoch": 19.0,
"eval_f1": 0.930292215034575,
"eval_loss": 0.5900245308876038,
"eval_runtime": 25.1515,
"eval_samples_per_second": 356.479,
"eval_steps_per_second": 5.606,
"step": 6289
},
{
"epoch": 19.637462235649547,
"grad_norm": 0.34078794717788696,
"learning_rate": 1.527190332326284e-05,
"loss": 0.008,
"step": 6500
},
{
"epoch": 20.0,
"eval_f1": 0.9282846308275708,
"eval_loss": 0.5922934412956238,
"eval_runtime": 25.1544,
"eval_samples_per_second": 356.438,
"eval_steps_per_second": 5.605,
"step": 6620
},
{
"epoch": 21.0,
"eval_f1": 0.9305152799464644,
"eval_loss": 0.6186188459396362,
"eval_runtime": 25.1563,
"eval_samples_per_second": 356.412,
"eval_steps_per_second": 5.605,
"step": 6951
},
{
"epoch": 21.148036253776436,
"grad_norm": 0.16627806425094604,
"learning_rate": 1.4138972809667674e-05,
"loss": 0.0068,
"step": 7000
},
{
"epoch": 22.0,
"eval_f1": 0.9291768904751282,
"eval_loss": 0.6076038479804993,
"eval_runtime": 25.1577,
"eval_samples_per_second": 356.392,
"eval_steps_per_second": 5.605,
"step": 7282
},
{
"epoch": 22.658610271903324,
"grad_norm": 0.02961309626698494,
"learning_rate": 1.3006042296072508e-05,
"loss": 0.0064,
"step": 7500
},
{
"epoch": 23.0,
"eval_f1": 0.930292215034575,
"eval_loss": 0.578154444694519,
"eval_runtime": 25.1751,
"eval_samples_per_second": 356.145,
"eval_steps_per_second": 5.601,
"step": 7613
},
{
"epoch": 24.0,
"eval_f1": 0.9319652018737452,
"eval_loss": 0.607693076133728,
"eval_runtime": 25.1532,
"eval_samples_per_second": 356.455,
"eval_steps_per_second": 5.606,
"step": 7944
},
{
"epoch": 24.169184290030213,
"grad_norm": 0.012147185392677784,
"learning_rate": 1.187311178247734e-05,
"loss": 0.0048,
"step": 8000
},
{
"epoch": 25.0,
"eval_f1": 0.9281730983716261,
"eval_loss": 0.6445909738540649,
"eval_runtime": 25.1606,
"eval_samples_per_second": 356.351,
"eval_steps_per_second": 5.604,
"step": 8275
},
{
"epoch": 25.6797583081571,
"grad_norm": 0.0304458886384964,
"learning_rate": 1.0740181268882177e-05,
"loss": 0.0046,
"step": 8500
},
{
"epoch": 26.0,
"eval_f1": 0.9315190720499665,
"eval_loss": 0.6416810154914856,
"eval_runtime": 25.1644,
"eval_samples_per_second": 356.298,
"eval_steps_per_second": 5.603,
"step": 8606
},
{
"epoch": 27.0,
"eval_f1": 0.9282846308275708,
"eval_loss": 0.6655632257461548,
"eval_runtime": 25.1733,
"eval_samples_per_second": 356.171,
"eval_steps_per_second": 5.601,
"step": 8937
},
{
"epoch": 27.190332326283986,
"grad_norm": 0.0014442217070609331,
"learning_rate": 9.60725075528701e-06,
"loss": 0.0053,
"step": 9000
},
{
"epoch": 28.0,
"eval_f1": 0.9288422931072943,
"eval_loss": 0.6541187763214111,
"eval_runtime": 25.1766,
"eval_samples_per_second": 356.124,
"eval_steps_per_second": 5.6,
"step": 9268
},
{
"epoch": 28.700906344410875,
"grad_norm": 0.0008725296938791871,
"learning_rate": 8.474320241691843e-06,
"loss": 0.0043,
"step": 9500
},
{
"epoch": 29.0,
"eval_f1": 0.9277269685478474,
"eval_loss": 0.6702625155448914,
"eval_runtime": 25.1263,
"eval_samples_per_second": 356.837,
"eval_steps_per_second": 5.612,
"step": 9599
},
{
"epoch": 30.0,
"eval_f1": 0.9251617220611198,
"eval_loss": 0.6871447563171387,
"eval_runtime": 25.1371,
"eval_samples_per_second": 356.684,
"eval_steps_per_second": 5.609,
"step": 9930
},
{
"epoch": 30.211480362537763,
"grad_norm": 0.0005139079876244068,
"learning_rate": 7.341389728096677e-06,
"loss": 0.0041,
"step": 10000
},
{
"epoch": 31.0,
"eval_f1": 0.9286192281954049,
"eval_loss": 0.6735148429870605,
"eval_runtime": 25.1585,
"eval_samples_per_second": 356.38,
"eval_steps_per_second": 5.604,
"step": 10261
},
{
"epoch": 31.72205438066465,
"grad_norm": 0.002090197755023837,
"learning_rate": 6.208459214501511e-06,
"loss": 0.0034,
"step": 10500
},
{
"epoch": 32.0,
"eval_f1": 0.9306268124024091,
"eval_loss": 0.6650559306144714,
"eval_runtime": 25.1466,
"eval_samples_per_second": 356.549,
"eval_steps_per_second": 5.607,
"step": 10592
},
{
"epoch": 33.0,
"eval_f1": 0.9305152799464644,
"eval_loss": 0.6799349188804626,
"eval_runtime": 25.1547,
"eval_samples_per_second": 356.435,
"eval_steps_per_second": 5.605,
"step": 10923
},
{
"epoch": 33.23262839879154,
"grad_norm": 0.016955886036157608,
"learning_rate": 5.075528700906345e-06,
"loss": 0.0032,
"step": 11000
},
{
"epoch": 34.0,
"eval_f1": 0.9297345527548516,
"eval_loss": 0.6752559542655945,
"eval_runtime": 25.1385,
"eval_samples_per_second": 356.664,
"eval_steps_per_second": 5.609,
"step": 11254
},
{
"epoch": 34.74320241691843,
"grad_norm": 0.0011760705383494496,
"learning_rate": 3.942598187311178e-06,
"loss": 0.0031,
"step": 11500
},
{
"epoch": 35.0,
"eval_f1": 0.9309614097702431,
"eval_loss": 0.6854746341705322,
"eval_runtime": 25.1626,
"eval_samples_per_second": 356.323,
"eval_steps_per_second": 5.604,
"step": 11585
},
{
"epoch": 36.0,
"eval_f1": 0.9306268124024091,
"eval_loss": 0.6885010600090027,
"eval_runtime": 25.1518,
"eval_samples_per_second": 356.475,
"eval_steps_per_second": 5.606,
"step": 11916
},
{
"epoch": 36.25377643504532,
"grad_norm": 0.000429723208071664,
"learning_rate": 2.809667673716012e-06,
"loss": 0.003,
"step": 12000
},
{
"epoch": 37.0,
"eval_f1": 0.9292884229310729,
"eval_loss": 0.6960038542747498,
"eval_runtime": 25.1589,
"eval_samples_per_second": 356.375,
"eval_steps_per_second": 5.604,
"step": 12247
},
{
"epoch": 37.764350453172206,
"grad_norm": 0.0003884187317453325,
"learning_rate": 1.6767371601208459e-06,
"loss": 0.0026,
"step": 12500
},
{
"epoch": 38.0,
"eval_f1": 0.9291768904751282,
"eval_loss": 0.6950347423553467,
"eval_runtime": 25.1564,
"eval_samples_per_second": 356.41,
"eval_steps_per_second": 5.605,
"step": 12578
},
{
"epoch": 39.0,
"eval_f1": 0.9297345527548516,
"eval_loss": 0.6964432597160339,
"eval_runtime": 25.1685,
"eval_samples_per_second": 356.238,
"eval_steps_per_second": 5.602,
"step": 12909
},
{
"epoch": 39.274924471299094,
"grad_norm": 0.03609294071793556,
"learning_rate": 5.438066465256798e-07,
"loss": 0.0033,
"step": 13000
},
{
"epoch": 40.0,
"eval_f1": 0.928953825563239,
"eval_loss": 0.6954053640365601,
"eval_runtime": 25.1629,
"eval_samples_per_second": 356.318,
"eval_steps_per_second": 5.603,
"step": 13240
},
{
"epoch": 40.0,
"step": 13240,
"total_flos": 5.5714266203904e+16,
"train_loss": 0.024559360085297206,
"train_runtime": 8074.1532,
"train_samples_per_second": 104.903,
"train_steps_per_second": 1.64
}
],
"logging_steps": 500,
"max_steps": 13240,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.5714266203904e+16,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}