{ "best_metric": 0.9326343966094134, "best_model_checkpoint": "/tmp/classification_hos_bert/checkpoint-662", "epoch": 40.0, "eval_steps": 500, "global_step": 13240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_f1": 0.9258309167967879, "eval_loss": 0.21829327940940857, "eval_runtime": 25.1238, "eval_samples_per_second": 356.873, "eval_steps_per_second": 5.612, "step": 331 }, { "epoch": 1.510574018126888, "grad_norm": 2.2944066524505615, "learning_rate": 2.8867069486404837e-05, "loss": 0.239, "step": 500 }, { "epoch": 2.0, "eval_f1": 0.9326343966094134, "eval_loss": 0.21681125462055206, "eval_runtime": 25.1665, "eval_samples_per_second": 356.267, "eval_steps_per_second": 5.603, "step": 662 }, { "epoch": 3.0, "eval_f1": 0.9281730983716261, "eval_loss": 0.25273850560188293, "eval_runtime": 25.1148, "eval_samples_per_second": 357.001, "eval_steps_per_second": 5.614, "step": 993 }, { "epoch": 3.0211480362537766, "grad_norm": 0.6900779604911804, "learning_rate": 2.7734138972809666e-05, "loss": 0.1255, "step": 1000 }, { "epoch": 4.0, "eval_f1": 0.9288422931072943, "eval_loss": 0.28959983587265015, "eval_runtime": 25.1245, "eval_samples_per_second": 356.863, "eval_steps_per_second": 5.612, "step": 1324 }, { "epoch": 4.531722054380665, "grad_norm": 1.7124501466751099, "learning_rate": 2.66012084592145e-05, "loss": 0.0662, "step": 1500 }, { "epoch": 5.0, "eval_f1": 0.9266116439884007, "eval_loss": 0.33890488743782043, "eval_runtime": 25.1434, "eval_samples_per_second": 356.595, "eval_steps_per_second": 5.608, "step": 1655 }, { "epoch": 6.0, "eval_f1": 0.9293999553870176, "eval_loss": 0.3792820870876312, "eval_runtime": 25.1391, "eval_samples_per_second": 356.656, "eval_steps_per_second": 5.609, "step": 1986 }, { "epoch": 6.042296072507553, "grad_norm": 0.8430729508399963, "learning_rate": 2.5468277945619337e-05, "loss": 0.0453, "step": 2000 }, { "epoch": 7.0, "eval_f1": 0.9251617220611198, "eval_loss": 0.41103312373161316, "eval_runtime": 25.1418, "eval_samples_per_second": 356.617, "eval_steps_per_second": 5.608, "step": 2317 }, { "epoch": 7.552870090634441, "grad_norm": 0.8941565752029419, "learning_rate": 2.433534743202417e-05, "loss": 0.0257, "step": 2500 }, { "epoch": 8.0, "eval_f1": 0.9204773589114432, "eval_loss": 0.4656200110912323, "eval_runtime": 25.1662, "eval_samples_per_second": 356.271, "eval_steps_per_second": 5.603, "step": 2648 }, { "epoch": 9.0, "eval_f1": 0.9262770466205665, "eval_loss": 0.49531668424606323, "eval_runtime": 25.1846, "eval_samples_per_second": 356.011, "eval_steps_per_second": 5.599, "step": 2979 }, { "epoch": 9.06344410876133, "grad_norm": 0.01369735598564148, "learning_rate": 2.3202416918429002e-05, "loss": 0.0196, "step": 3000 }, { "epoch": 10.0, "eval_f1": 0.9265001115324559, "eval_loss": 0.5412325263023376, "eval_runtime": 25.1393, "eval_samples_per_second": 356.653, "eval_steps_per_second": 5.609, "step": 3310 }, { "epoch": 10.574018126888218, "grad_norm": 1.39247727394104, "learning_rate": 2.2069486404833838e-05, "loss": 0.0125, "step": 3500 }, { "epoch": 11.0, "eval_f1": 0.9244925273254517, "eval_loss": 0.5528218150138855, "eval_runtime": 25.112, "eval_samples_per_second": 357.04, "eval_steps_per_second": 5.615, "step": 3641 }, { "epoch": 12.0, "eval_f1": 0.9261655141646219, "eval_loss": 0.5526648759841919, "eval_runtime": 25.1453, "eval_samples_per_second": 356.568, "eval_steps_per_second": 5.607, "step": 3972 }, { "epoch": 12.084592145015106, "grad_norm": 0.3373314440250397, "learning_rate": 2.093655589123867e-05, "loss": 0.0141, "step": 4000 }, { "epoch": 13.0, "eval_f1": 0.9276154360919028, "eval_loss": 0.5682665705680847, "eval_runtime": 25.1511, "eval_samples_per_second": 356.486, "eval_steps_per_second": 5.606, "step": 4303 }, { "epoch": 13.595166163141993, "grad_norm": 0.07623090595006943, "learning_rate": 1.9803625377643507e-05, "loss": 0.0097, "step": 4500 }, { "epoch": 14.0, "eval_f1": 0.9239348650457283, "eval_loss": 0.5835373997688293, "eval_runtime": 25.143, "eval_samples_per_second": 356.6, "eval_steps_per_second": 5.608, "step": 4634 }, { "epoch": 15.0, "eval_f1": 0.9279500334597368, "eval_loss": 0.5905042886734009, "eval_runtime": 25.1447, "eval_samples_per_second": 356.576, "eval_steps_per_second": 5.608, "step": 4965 }, { "epoch": 15.105740181268882, "grad_norm": 0.01646752655506134, "learning_rate": 1.867069486404834e-05, "loss": 0.0107, "step": 5000 }, { "epoch": 16.0, "eval_f1": 0.9298460852107964, "eval_loss": 0.5799357295036316, "eval_runtime": 25.142, "eval_samples_per_second": 356.615, "eval_steps_per_second": 5.608, "step": 5296 }, { "epoch": 16.61631419939577, "grad_norm": 0.061389509588479996, "learning_rate": 1.753776435045317e-05, "loss": 0.009, "step": 5500 }, { "epoch": 17.0, "eval_f1": 0.9266116439884007, "eval_loss": 0.6126909255981445, "eval_runtime": 25.1653, "eval_samples_per_second": 356.285, "eval_steps_per_second": 5.603, "step": 5627 }, { "epoch": 18.0, "eval_f1": 0.9283961632835155, "eval_loss": 0.591077446937561, "eval_runtime": 25.1503, "eval_samples_per_second": 356.497, "eval_steps_per_second": 5.606, "step": 5958 }, { "epoch": 18.12688821752266, "grad_norm": 0.0015490599907934666, "learning_rate": 1.6404833836858007e-05, "loss": 0.0084, "step": 6000 }, { "epoch": 19.0, "eval_f1": 0.930292215034575, "eval_loss": 0.5900245308876038, "eval_runtime": 25.1515, "eval_samples_per_second": 356.479, "eval_steps_per_second": 5.606, "step": 6289 }, { "epoch": 19.637462235649547, "grad_norm": 0.34078794717788696, "learning_rate": 1.527190332326284e-05, "loss": 0.008, "step": 6500 }, { "epoch": 20.0, "eval_f1": 0.9282846308275708, "eval_loss": 0.5922934412956238, "eval_runtime": 25.1544, "eval_samples_per_second": 356.438, "eval_steps_per_second": 5.605, "step": 6620 }, { "epoch": 21.0, "eval_f1": 0.9305152799464644, "eval_loss": 0.6186188459396362, "eval_runtime": 25.1563, "eval_samples_per_second": 356.412, "eval_steps_per_second": 5.605, "step": 6951 }, { "epoch": 21.148036253776436, "grad_norm": 0.16627806425094604, "learning_rate": 1.4138972809667674e-05, "loss": 0.0068, "step": 7000 }, { "epoch": 22.0, "eval_f1": 0.9291768904751282, "eval_loss": 0.6076038479804993, "eval_runtime": 25.1577, "eval_samples_per_second": 356.392, "eval_steps_per_second": 5.605, "step": 7282 }, { "epoch": 22.658610271903324, "grad_norm": 0.02961309626698494, "learning_rate": 1.3006042296072508e-05, "loss": 0.0064, "step": 7500 }, { "epoch": 23.0, "eval_f1": 0.930292215034575, "eval_loss": 0.578154444694519, "eval_runtime": 25.1751, "eval_samples_per_second": 356.145, "eval_steps_per_second": 5.601, "step": 7613 }, { "epoch": 24.0, "eval_f1": 0.9319652018737452, "eval_loss": 0.607693076133728, "eval_runtime": 25.1532, "eval_samples_per_second": 356.455, "eval_steps_per_second": 5.606, "step": 7944 }, { "epoch": 24.169184290030213, "grad_norm": 0.012147185392677784, "learning_rate": 1.187311178247734e-05, "loss": 0.0048, "step": 8000 }, { "epoch": 25.0, "eval_f1": 0.9281730983716261, "eval_loss": 0.6445909738540649, "eval_runtime": 25.1606, "eval_samples_per_second": 356.351, "eval_steps_per_second": 5.604, "step": 8275 }, { "epoch": 25.6797583081571, "grad_norm": 0.0304458886384964, "learning_rate": 1.0740181268882177e-05, "loss": 0.0046, "step": 8500 }, { "epoch": 26.0, "eval_f1": 0.9315190720499665, "eval_loss": 0.6416810154914856, "eval_runtime": 25.1644, "eval_samples_per_second": 356.298, "eval_steps_per_second": 5.603, "step": 8606 }, { "epoch": 27.0, "eval_f1": 0.9282846308275708, "eval_loss": 0.6655632257461548, "eval_runtime": 25.1733, "eval_samples_per_second": 356.171, "eval_steps_per_second": 5.601, "step": 8937 }, { "epoch": 27.190332326283986, "grad_norm": 0.0014442217070609331, "learning_rate": 9.60725075528701e-06, "loss": 0.0053, "step": 9000 }, { "epoch": 28.0, "eval_f1": 0.9288422931072943, "eval_loss": 0.6541187763214111, "eval_runtime": 25.1766, "eval_samples_per_second": 356.124, "eval_steps_per_second": 5.6, "step": 9268 }, { "epoch": 28.700906344410875, "grad_norm": 0.0008725296938791871, "learning_rate": 8.474320241691843e-06, "loss": 0.0043, "step": 9500 }, { "epoch": 29.0, "eval_f1": 0.9277269685478474, "eval_loss": 0.6702625155448914, "eval_runtime": 25.1263, "eval_samples_per_second": 356.837, "eval_steps_per_second": 5.612, "step": 9599 }, { "epoch": 30.0, "eval_f1": 0.9251617220611198, "eval_loss": 0.6871447563171387, "eval_runtime": 25.1371, "eval_samples_per_second": 356.684, "eval_steps_per_second": 5.609, "step": 9930 }, { "epoch": 30.211480362537763, "grad_norm": 0.0005139079876244068, "learning_rate": 7.341389728096677e-06, "loss": 0.0041, "step": 10000 }, { "epoch": 31.0, "eval_f1": 0.9286192281954049, "eval_loss": 0.6735148429870605, "eval_runtime": 25.1585, "eval_samples_per_second": 356.38, "eval_steps_per_second": 5.604, "step": 10261 }, { "epoch": 31.72205438066465, "grad_norm": 0.002090197755023837, "learning_rate": 6.208459214501511e-06, "loss": 0.0034, "step": 10500 }, { "epoch": 32.0, "eval_f1": 0.9306268124024091, "eval_loss": 0.6650559306144714, "eval_runtime": 25.1466, "eval_samples_per_second": 356.549, "eval_steps_per_second": 5.607, "step": 10592 }, { "epoch": 33.0, "eval_f1": 0.9305152799464644, "eval_loss": 0.6799349188804626, "eval_runtime": 25.1547, "eval_samples_per_second": 356.435, "eval_steps_per_second": 5.605, "step": 10923 }, { "epoch": 33.23262839879154, "grad_norm": 0.016955886036157608, "learning_rate": 5.075528700906345e-06, "loss": 0.0032, "step": 11000 }, { "epoch": 34.0, "eval_f1": 0.9297345527548516, "eval_loss": 0.6752559542655945, "eval_runtime": 25.1385, "eval_samples_per_second": 356.664, "eval_steps_per_second": 5.609, "step": 11254 }, { "epoch": 34.74320241691843, "grad_norm": 0.0011760705383494496, "learning_rate": 3.942598187311178e-06, "loss": 0.0031, "step": 11500 }, { "epoch": 35.0, "eval_f1": 0.9309614097702431, "eval_loss": 0.6854746341705322, "eval_runtime": 25.1626, "eval_samples_per_second": 356.323, "eval_steps_per_second": 5.604, "step": 11585 }, { "epoch": 36.0, "eval_f1": 0.9306268124024091, "eval_loss": 0.6885010600090027, "eval_runtime": 25.1518, "eval_samples_per_second": 356.475, "eval_steps_per_second": 5.606, "step": 11916 }, { "epoch": 36.25377643504532, "grad_norm": 0.000429723208071664, "learning_rate": 2.809667673716012e-06, "loss": 0.003, "step": 12000 }, { "epoch": 37.0, "eval_f1": 0.9292884229310729, "eval_loss": 0.6960038542747498, "eval_runtime": 25.1589, "eval_samples_per_second": 356.375, "eval_steps_per_second": 5.604, "step": 12247 }, { "epoch": 37.764350453172206, "grad_norm": 0.0003884187317453325, "learning_rate": 1.6767371601208459e-06, "loss": 0.0026, "step": 12500 }, { "epoch": 38.0, "eval_f1": 0.9291768904751282, "eval_loss": 0.6950347423553467, "eval_runtime": 25.1564, "eval_samples_per_second": 356.41, "eval_steps_per_second": 5.605, "step": 12578 }, { "epoch": 39.0, "eval_f1": 0.9297345527548516, "eval_loss": 0.6964432597160339, "eval_runtime": 25.1685, "eval_samples_per_second": 356.238, "eval_steps_per_second": 5.602, "step": 12909 }, { "epoch": 39.274924471299094, "grad_norm": 0.03609294071793556, "learning_rate": 5.438066465256798e-07, "loss": 0.0033, "step": 13000 }, { "epoch": 40.0, "eval_f1": 0.928953825563239, "eval_loss": 0.6954053640365601, "eval_runtime": 25.1629, "eval_samples_per_second": 356.318, "eval_steps_per_second": 5.603, "step": 13240 }, { "epoch": 40.0, "step": 13240, "total_flos": 5.5714266203904e+16, "train_loss": 0.024559360085297206, "train_runtime": 8074.1532, "train_samples_per_second": 104.903, "train_steps_per_second": 1.64 } ], "logging_steps": 500, "max_steps": 13240, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.5714266203904e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }