{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 648, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_B-Claim": { "f1-score": 0.17153996101364524, "precision": 0.25287356321839083, "recall": 0.12979351032448377, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.08383233532934131, "precision": 1.0, "recall": 0.04375, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8469620831429876, "precision": 0.7427884615384616, "recall": 0.9851222104144527, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.5420081967213116, "precision": 0.6803858520900321, "recall": 0.4504044274159217, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8343351767251743, "precision": 0.8141717503519474, "recall": 0.8555226824457594, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9004323458767015, "precision": 0.8590809093131264, "recall": 0.9459659511472983, "support": 14861.0 }, "eval_O": { "f1-score": 0.9988389078242631, "precision": 0.9991008541885209, "recall": 0.9985770987793006, "support": 13353.0 }, "eval_accuracy": 0.8856789444749863, "eval_loss": 0.28347474336624146, "eval_macro avg": { "f1-score": 0.6254212866619178, "precision": 0.76405734152864, "recall": 0.629876554361031, "support": 36380.0 }, "eval_runtime": 5.0448, "eval_samples_per_second": 16.056, "eval_steps_per_second": 2.18, "eval_weighted avg": { "f1-score": 0.8748148382495958, "precision": 0.8768575102351055, "recall": 0.8856789444749863, "support": 36380.0 }, "step": 81 }, { "epoch": 2.0, "eval_B-Claim": { "f1-score": 0.5137931034482759, "precision": 0.6182572614107884, "recall": 0.43952802359882004, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.7622641509433963, "precision": 0.9619047619047619, "recall": 0.63125, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8853910477127397, "precision": 0.8241758241758241, "recall": 0.9564293304994687, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.5413588470388762, "precision": 0.6279853891542568, "recall": 0.47573435504469985, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.7949260042283298, "precision": 0.856492027334852, "recall": 0.7416173570019724, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9007064868336545, "precision": 0.8614779777627618, "recall": 0.9436780835744566, "support": 14861.0 }, "eval_O": { "f1-score": 0.9988389947942025, "precision": 0.9990260713215463, "recall": 0.9986519883172321, "support": 13353.0 }, "eval_accuracy": 0.8864211105002748, "eval_loss": 0.2809496521949768, "eval_macro avg": { "f1-score": 0.7710398049999251, "precision": 0.8213313304378275, "recall": 0.7409841625766643, "support": 36380.0 }, "eval_runtime": 5.0579, "eval_samples_per_second": 16.014, "eval_steps_per_second": 2.175, "eval_weighted avg": { "f1-score": 0.8798131143596267, "precision": 0.8787439244541853, "recall": 0.8864211105002748, "support": 36380.0 }, "step": 162 }, { "epoch": 3.0, "eval_B-Claim": { "f1-score": 0.6401137980085349, "precision": 0.6181318681318682, "recall": 0.6637168141592921, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.7918781725888324, "precision": 0.6666666666666666, "recall": 0.975, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8866442199775534, "precision": 0.93935790725327, "recall": 0.8395324123273114, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6254390633315594, "precision": 0.6255056418990845, "recall": 0.6253724989357173, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.76480605487228, "precision": 0.6205096714768191, "recall": 0.9965483234714004, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9049205625504156, "precision": 0.9449897450922942, "recall": 0.868111163447951, "support": 14861.0 }, "eval_O": { "f1-score": 0.9988758992805756, "precision": 0.9995500562429697, "recall": 0.9982026510896428, "support": 13353.0 }, "eval_accuracy": 0.8894997251236944, "eval_loss": 0.318760484457016, "eval_macro avg": { "f1-score": 0.8018111100871074, "precision": 0.7735302223947104, "recall": 0.852354837633045, "support": 36380.0 }, "eval_runtime": 5.0862, "eval_samples_per_second": 15.925, "eval_steps_per_second": 2.163, "eval_weighted avg": { "f1-score": 0.8920666591013163, "precision": 0.9012548868310761, "recall": 0.8894997251236944, "support": 36380.0 }, "step": 243 }, { "epoch": 4.0, "eval_B-Claim": { "f1-score": 0.7034883720930232, "precision": 0.6934097421203438, "recall": 0.7138643067846607, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.8958990536277602, "precision": 0.9044585987261147, "recall": 0.8875, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9051172707889126, "precision": 0.9080213903743316, "recall": 0.9022316684378321, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6932699265998139, "precision": 0.6739698492462312, "recall": 0.7137079608343976, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8904009720534629, "precision": 0.877815045519885, "recall": 0.903353057199211, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9127480293558032, "precision": 0.9218310342461052, "recall": 0.9038422717179194, "support": 14861.0 }, "eval_O": { "f1-score": 0.9981619715668254, "precision": 0.9999248459341651, "recall": 0.9964053021792856, "support": 13353.0 }, "eval_accuracy": 0.9113523914238593, "eval_loss": 0.2517726719379425, "eval_macro avg": { "f1-score": 0.8570122280122289, "precision": 0.8542043580238825, "recall": 0.8601292238790438, "support": 36380.0 }, "eval_runtime": 5.062, "eval_samples_per_second": 16.002, "eval_steps_per_second": 2.173, "eval_weighted avg": { "f1-score": 0.9122886837716081, "precision": 0.9134709768686261, "recall": 0.9113523914238593, "support": 36380.0 }, "step": 324 }, { "epoch": 5.0, "eval_B-Claim": { "f1-score": 0.6590538336052201, "precision": 0.7372262773722628, "recall": 0.5958702064896755, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9078947368421054, "precision": 0.9583333333333334, "recall": 0.8625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9066802651708312, "precision": 0.8715686274509804, "recall": 0.944739638682253, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6513589503280225, "precision": 0.7243355914538823, "recall": 0.5917411664538101, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8981975120588981, "precision": 0.9256933542647828, "recall": 0.8722879684418146, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9158848058378942, "precision": 0.8877170824123777, "recall": 0.9458986609245676, "support": 14861.0 }, "eval_O": { "f1-score": 0.9994384336041331, "precision": 0.9992513849378649, "recall": 0.9996255523103422, "support": 13353.0 }, "eval_accuracy": 0.9121220450797142, "eval_loss": 0.2739432156085968, "eval_macro avg": { "f1-score": 0.8483583624924435, "precision": 0.8720179501750691, "recall": 0.8303804561860663, "support": 36380.0 }, "eval_runtime": 5.0932, "eval_samples_per_second": 15.904, "eval_steps_per_second": 2.16, "eval_weighted avg": { "f1-score": 0.9087400479269403, "precision": 0.9081638580455985, "recall": 0.9121220450797142, "support": 36380.0 }, "step": 405 }, { "epoch": 6.0, "eval_B-Claim": { "f1-score": 0.6675461741424802, "precision": 0.60381861575179, "recall": 0.7463126843657817, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.887608069164265, "precision": 0.8235294117647058, "recall": 0.9625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8737316798196167, "precision": 0.9303721488595438, "recall": 0.8235919234856536, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6529236868186323, "precision": 0.6109050445103857, "recall": 0.7011494252873564, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8689320388349515, "precision": 0.7863418530351438, "recall": 0.9709072978303748, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.8937734066010992, "precision": 0.9312910284463894, "recall": 0.8591615638247763, "support": 14861.0 }, "eval_O": { "f1-score": 0.9988009592326139, "precision": 0.9994750656167979, "recall": 0.9981277615517112, "support": 13353.0 }, "eval_accuracy": 0.8944749862561847, "eval_loss": 0.37784042954444885, "eval_macro avg": { "f1-score": 0.8347594306590942, "precision": 0.8122475954263937, "recall": 0.8659643794779507, "support": 36380.0 }, "eval_runtime": 5.0734, "eval_samples_per_second": 15.966, "eval_steps_per_second": 2.168, "eval_weighted avg": { "f1-score": 0.8971820344797716, "precision": 0.9033144340485707, "recall": 0.8944749862561847, "support": 36380.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 21.778270721435547, "learning_rate": 1.7530864197530865e-05, "loss": 0.2481, "step": 500 }, { "epoch": 7.0, "eval_B-Claim": { "f1-score": 0.6489859594383774, "precision": 0.6887417218543046, "recall": 0.6135693215339233, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.8852459016393444, "precision": 0.9310344827586207, "recall": 0.84375, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9011898603207449, "precision": 0.8780241935483871, "recall": 0.9256110520722636, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.626564114338193, "precision": 0.6800398704211313, "recall": 0.5808854831843338, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8813559322033898, "precision": 0.9049350649350649, "recall": 0.8589743589743589, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9073595340161655, "precision": 0.8831698305516626, "recall": 0.9329116479375547, "support": 14861.0 }, "eval_O": { "f1-score": 0.9981994148098132, "precision": 1.0, "recall": 0.9964053021792856, "support": 13353.0 }, "eval_accuracy": 0.9030786146234194, "eval_loss": 0.4109443426132202, "eval_macro avg": { "f1-score": 0.8355572452522898, "precision": 0.8522778805813102, "recall": 0.8217295951259599, "support": 36380.0 }, "eval_runtime": 5.0454, "eval_samples_per_second": 16.054, "eval_steps_per_second": 2.18, "eval_weighted avg": { "f1-score": 0.9003265559019435, "precision": 0.8992988510674564, "recall": 0.9030786146234194, "support": 36380.0 }, "step": 567 }, { "epoch": 8.0, "eval_B-Claim": { "f1-score": 0.7029972752043596, "precision": 0.6531645569620254, "recall": 0.7610619469026548, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9134328358208955, "precision": 0.8742857142857143, "recall": 0.95625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8914728682170544, "precision": 0.930635838150289, "recall": 0.8554729011689692, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6790487421383647, "precision": 0.6307046367287331, "recall": 0.7354193273733504, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8939393939393939, "precision": 0.836340206185567, "recall": 0.9600591715976331, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.8993180625983564, "precision": 0.9362166885102665, "recall": 0.8652176838705337, "support": 14861.0 }, "eval_O": { "f1-score": 0.9979819119515658, "precision": 0.9960462513987318, "recall": 0.9999251104620684, "support": 13353.0 }, "eval_accuracy": 0.902363936228697, "eval_loss": 0.4763801097869873, "eval_macro avg": { "f1-score": 0.8540272985528559, "precision": 0.8367705560316182, "recall": 0.8762008773393156, "support": 36380.0 }, "eval_runtime": 5.0791, "eval_samples_per_second": 15.948, "eval_steps_per_second": 2.166, "eval_weighted avg": { "f1-score": 0.9048169208096876, "precision": 0.9101018951943243, "recall": 0.902363936228697, "support": 36380.0 }, "step": 648 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1146754059192000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }