diff --git "a/checkpoint-2430/trainer_state.json" "b/checkpoint-2430/trainer_state.json" --- "a/checkpoint-2430/trainer_state.json" +++ "b/checkpoint-2430/trainer_state.json" @@ -11,1918 +11,1918 @@ { "epoch": 1.0, "eval_B-Claim": { - "f1-score": 0.18764302059496568, - "precision": 0.3416666666666667, - "recall": 0.12933753943217666, - "support": 317.0 + "f1-score": 0.17153996101364524, + "precision": 0.25287356321839083, + "recall": 0.12979351032448377, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.19540229885057472, - "precision": 0.8947368421052632, - "recall": 0.10967741935483871, - "support": 155.0 + "f1-score": 0.08383233532934131, + "precision": 1.0, + "recall": 0.04375, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8476339053562143, - "precision": 0.740909090909091, - "recall": 0.9902794653705954, - "support": 823.0 + "f1-score": 0.8469620831429876, + "precision": 0.7427884615384616, + "recall": 0.9851222104144527, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.4315998169894769, - "precision": 0.6394035246272028, - "recall": 0.3257366482504604, - "support": 4344.0 + "f1-score": 0.5420081967213116, + "precision": 0.6803858520900321, + "recall": 0.4504044274159217, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.7779799818016379, - "precision": 0.7493426818580193, - "recall": 0.8088930936613056, - "support": 2114.0 + "f1-score": 0.8343351767251743, + "precision": 0.8141717503519474, + "recall": 0.8555226824457594, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.8948683715038855, - "precision": 0.8376057421174058, - "recall": 0.9605350187403542, - "support": 13607.0 + "f1-score": 0.9004323458767015, + "precision": 0.8590809093131264, + "recall": 0.9459659511472983, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9980147987727848, - "precision": 0.9970251509961237, - "recall": 0.9990064131514769, - "support": 11071.0 + "f1-score": 0.9988389078242631, + "precision": 0.9991008541885209, + "recall": 0.9985770987793006, + "support": 13353.0 }, - "eval_accuracy": 0.8673183065585397, - "eval_loss": 0.3193369209766388, + "eval_accuracy": 0.8856789444749863, + "eval_loss": 0.28347474336624146, "eval_macro avg": { - "f1-score": 0.6190203134099342, - "precision": 0.7429556713256817, - "recall": 0.6176379425658869, - "support": 32431.0 - }, - "eval_runtime": 4.9083, - "eval_samples_per_second": 16.299, - "eval_steps_per_second": 2.037, + "f1-score": 0.6254212866619178, + "precision": 0.76405734152864, + "recall": 0.629876554361031, + "support": 36380.0 + }, + "eval_runtime": 5.0448, + "eval_samples_per_second": 16.056, + "eval_steps_per_second": 2.18, "eval_weighted avg": { - "f1-score": 0.8489527906249091, - "precision": 0.8526967051825685, - "recall": 0.8673183065585397, - "support": 32431.0 + "f1-score": 0.8748148382495958, + "precision": 0.8768575102351055, + "recall": 0.8856789444749863, + "support": 36380.0 }, "step": 81 }, { "epoch": 2.0, "eval_B-Claim": { - "f1-score": 0.583941605839416, - "precision": 0.6926406926406926, - "recall": 0.5047318611987381, - "support": 317.0 + "f1-score": 0.5137931034482759, + "precision": 0.6182572614107884, + "recall": 0.43952802359882004, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.8117647058823529, - "precision": 0.745945945945946, - "recall": 0.8903225806451613, - "support": 155.0 + "f1-score": 0.7622641509433963, + "precision": 0.9619047619047619, + "recall": 0.63125, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8943661971830986, - "precision": 0.8649262202043133, - "recall": 0.9258809234507898, - "support": 823.0 + "f1-score": 0.8853910477127397, + "precision": 0.8241758241758241, + "recall": 0.9564293304994687, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6197740770932805, - "precision": 0.6271506009898656, - "recall": 0.6125690607734806, - "support": 4344.0 + "f1-score": 0.5413588470388762, + "precision": 0.6279853891542568, + "recall": 0.47573435504469985, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.8298737427776589, - "precision": 0.7577178585384916, - "recall": 0.9172185430463576, - "support": 2114.0 + "f1-score": 0.7949260042283298, + "precision": 0.856492027334852, + "recall": 0.7416173570019724, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9013697613125952, - "precision": 0.9106660666066607, - "recall": 0.8922613360770192, - "support": 13607.0 + "f1-score": 0.9007064868336545, + "precision": 0.8614779777627618, + "recall": 0.9436780835744566, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9961487925331883, - "precision": 0.9993636363636363, - "recall": 0.9929545659831993, - "support": 11071.0 + "f1-score": 0.9988389947942025, + "precision": 0.9990260713215463, + "recall": 0.9986519883172321, + "support": 13353.0 }, - "eval_accuracy": 0.887854213561099, - "eval_loss": 0.26407390832901, + "eval_accuracy": 0.8864211105002748, + "eval_loss": 0.2809496521949768, "eval_macro avg": { - "f1-score": 0.8053198403745129, - "precision": 0.7997730030413723, - "recall": 0.8194198387392494, - "support": 32431.0 - }, - "eval_runtime": 4.9329, - "eval_samples_per_second": 16.218, - "eval_steps_per_second": 2.027, + "f1-score": 0.7710398049999251, + "precision": 0.8213313304378275, + "recall": 0.7409841625766643, + "support": 36380.0 + }, + "eval_runtime": 5.0579, + "eval_samples_per_second": 16.014, + "eval_steps_per_second": 2.175, "eval_weighted avg": { - "f1-score": 0.8876368199002541, - "precision": 0.8889201304482091, - "recall": 0.887854213561099, - "support": 32431.0 + "f1-score": 0.8798131143596267, + "precision": 0.8787439244541853, + "recall": 0.8864211105002748, + "support": 36380.0 }, "step": 162 }, { "epoch": 3.0, "eval_B-Claim": { - "f1-score": 0.5895522388059701, - "precision": 0.7214611872146118, - "recall": 0.49842271293375395, - "support": 317.0 + "f1-score": 0.6401137980085349, + "precision": 0.6181318681318682, + "recall": 0.6637168141592921, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.8668941979522183, - "precision": 0.9202898550724637, - "recall": 0.8193548387096774, - "support": 155.0 + "f1-score": 0.7918781725888324, + "precision": 0.6666666666666666, + "recall": 0.975, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8929178470254956, - "precision": 0.8365180467091295, - "recall": 0.9574726609963548, - "support": 823.0 + "f1-score": 0.8866442199775534, + "precision": 0.93935790725327, + "recall": 0.8395324123273114, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6548140728684112, - "precision": 0.7178149876475433, - "recall": 0.6019797421731123, - "support": 4344.0 + "f1-score": 0.6254390633315594, + "precision": 0.6255056418990845, + "recall": 0.6253724989357173, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.8815622622368754, - "precision": 0.9502460360852925, - "recall": 0.8221381267738883, - "support": 2114.0 + "f1-score": 0.76480605487228, + "precision": 0.6205096714768191, + "recall": 0.9965483234714004, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9169943670953343, - "precision": 0.8852257181942544, - "recall": 0.9511280958330272, - "support": 13607.0 + "f1-score": 0.9049205625504156, + "precision": 0.9449897450922942, + "recall": 0.868111163447951, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9984170774727511, - "precision": 0.9998188405797102, - "recall": 0.9970192394544305, - "support": 11071.0 + "f1-score": 0.9988758992805756, + "precision": 0.9995500562429697, + "recall": 0.9982026510896428, + "support": 13353.0 }, - "eval_accuracy": 0.9067250470229101, - "eval_loss": 0.25323203206062317, + "eval_accuracy": 0.8894997251236944, + "eval_loss": 0.318760484457016, "eval_macro avg": { - "f1-score": 0.8287360090652937, - "precision": 0.8616249530718579, - "recall": 0.8067879166963207, - "support": 32431.0 - }, - "eval_runtime": 4.9025, - "eval_samples_per_second": 16.318, - "eval_steps_per_second": 2.04, + "f1-score": 0.8018111100871074, + "precision": 0.7735302223947104, + "recall": 0.852354837633045, + "support": 36380.0 + }, + "eval_runtime": 5.0862, + "eval_samples_per_second": 15.925, + "eval_steps_per_second": 2.163, "eval_weighted avg": { - "f1-score": 0.9033110548907601, - "precision": 0.9034896128542845, - "recall": 0.9067250470229101, - "support": 32431.0 + "f1-score": 0.8920666591013163, + "precision": 0.9012548868310761, + "recall": 0.8894997251236944, + "support": 36380.0 }, "step": 243 }, { "epoch": 4.0, "eval_B-Claim": { - "f1-score": 0.6806136680613668, - "precision": 0.61, - "recall": 0.7697160883280757, - "support": 317.0 + "f1-score": 0.7034883720930232, + "precision": 0.6934097421203438, + "recall": 0.7138643067846607, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.8861538461538462, - "precision": 0.8470588235294118, - "recall": 0.9290322580645162, - "support": 155.0 + "f1-score": 0.8958990536277602, + "precision": 0.9044585987261147, + "recall": 0.8875, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8687258687258688, - "precision": 0.9233926128590971, - "recall": 0.8201701093560145, - "support": 823.0 + "f1-score": 0.9051172707889126, + "precision": 0.9080213903743316, + "recall": 0.9022316684378321, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.656155854589275, - "precision": 0.5819380121125757, - "recall": 0.7520718232044199, - "support": 4344.0 + "f1-score": 0.6932699265998139, + "precision": 0.6739698492462312, + "recall": 0.7137079608343976, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.8818897637795275, - "precision": 0.8408408408408409, - "recall": 0.9271523178807947, - "support": 2114.0 + "f1-score": 0.8904009720534629, + "precision": 0.877815045519885, + "recall": 0.903353057199211, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.8829758349171871, - "precision": 0.9349433218334154, - "recall": 0.836481222899978, - "support": 13607.0 + "f1-score": 0.9127480293558032, + "precision": 0.9218310342461052, + "recall": 0.9038422717179194, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9971017118014671, - "precision": 0.9998183634547271, - "recall": 0.9943997832174148, - "support": 11071.0 + "f1-score": 0.9981619715668254, + "precision": 0.9999248459341651, + "recall": 0.9964053021792856, + "support": 13353.0 }, - "eval_accuracy": 0.8843698930036077, - "eval_loss": 0.2791365385055542, + "eval_accuracy": 0.9113523914238593, + "eval_loss": 0.2517726719379425, "eval_macro avg": { - "f1-score": 0.8362309354326484, - "precision": 0.8197131392328668, - "recall": 0.8612890861358877, - "support": 32431.0 - }, - "eval_runtime": 4.9234, - "eval_samples_per_second": 16.249, - "eval_steps_per_second": 2.031, + "f1-score": 0.8570122280122289, + "precision": 0.8542043580238825, + "recall": 0.8601292238790438, + "support": 36380.0 + }, + "eval_runtime": 5.062, + "eval_samples_per_second": 16.002, + "eval_steps_per_second": 2.173, "eval_weighted avg": { - "f1-score": 0.889158243622726, - "precision": 0.8997827813567029, - "recall": 0.8843698930036077, - "support": 32431.0 + "f1-score": 0.9122886837716081, + "precision": 0.9134709768686261, + "recall": 0.9113523914238593, + "support": 36380.0 }, "step": 324 }, { "epoch": 5.0, "eval_B-Claim": { - "f1-score": 0.6078799249530958, - "precision": 0.75, - "recall": 0.5110410094637224, - "support": 317.0 + "f1-score": 0.6590538336052201, + "precision": 0.7372262773722628, + "recall": 0.5958702064896755, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.8971962616822431, - "precision": 0.8674698795180723, - "recall": 0.9290322580645162, - "support": 155.0 + "f1-score": 0.9078947368421054, + "precision": 0.9583333333333334, + "recall": 0.8625, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.893739230327398, - "precision": 0.8474945533769063, - "recall": 0.945321992709599, - "support": 823.0 + "f1-score": 0.9066802651708312, + "precision": 0.8715686274509804, + "recall": 0.944739638682253, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6119813135476778, - "precision": 0.7590320381731425, - "recall": 0.5126611418047882, - "support": 4344.0 + "f1-score": 0.6513589503280225, + "precision": 0.7243355914538823, + "recall": 0.5917411664538101, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.8889393595275948, - "precision": 0.854958497160332, - "recall": 0.9257332071901608, - "support": 2114.0 + "f1-score": 0.8981975120588981, + "precision": 0.9256933542647828, + "recall": 0.8722879684418146, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9137743463765573, - "precision": 0.874462654486835, - "recall": 0.956786947894466, - "support": 13607.0 + "f1-score": 0.9158848058378942, + "precision": 0.8877170824123777, + "recall": 0.9458986609245676, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9976913675252366, - "precision": 1.0, - "recall": 0.9953933700659381, - "support": 11071.0 + "f1-score": 0.9994384336041331, + "precision": 0.9992513849378649, + "recall": 0.9996255523103422, + "support": 13353.0 }, - "eval_accuracy": 0.9036724121982054, - "eval_loss": 0.3500836193561554, + "eval_accuracy": 0.9121220450797142, + "eval_loss": 0.2739432156085968, "eval_macro avg": { - "f1-score": 0.8301716862771149, - "precision": 0.8504882318164697, - "recall": 0.8251385610275985, - "support": 32431.0 - }, - "eval_runtime": 4.9239, - "eval_samples_per_second": 16.247, - "eval_steps_per_second": 2.031, + "f1-score": 0.8483583624924435, + "precision": 0.8720179501750691, + "recall": 0.8303804561860663, + "support": 36380.0 + }, + "eval_runtime": 5.0932, + "eval_samples_per_second": 15.904, + "eval_steps_per_second": 2.16, "eval_weighted avg": { - "f1-score": 0.8968007164885599, - "precision": 0.8986502613295283, - "recall": 0.9036724121982054, - "support": 32431.0 + "f1-score": 0.9087400479269403, + "precision": 0.9081638580455985, + "recall": 0.9121220450797142, + "support": 36380.0 }, "step": 405 }, { "epoch": 6.0, "eval_B-Claim": { - "f1-score": 0.672077922077922, - "precision": 0.6923076923076923, - "recall": 0.6529968454258676, - "support": 317.0 + "f1-score": 0.6675461741424802, + "precision": 0.60381861575179, + "recall": 0.7463126843657817, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.8910256410256411, - "precision": 0.8853503184713376, - "recall": 0.896774193548387, - "support": 155.0 + "f1-score": 0.887608069164265, + "precision": 0.8235294117647058, + "recall": 0.9625, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8907563025210083, - "precision": 0.8801897983392646, - "recall": 0.9015795868772782, - "support": 823.0 + "f1-score": 0.8737316798196167, + "precision": 0.9303721488595438, + "recall": 0.8235919234856536, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6666666666666667, - "precision": 0.6998228296633764, - "recall": 0.6365101289134438, - "support": 4344.0 + "f1-score": 0.6529236868186323, + "precision": 0.6109050445103857, + "recall": 0.7011494252873564, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.8853370396108408, - "precision": 0.867453472537449, - "recall": 0.9039735099337748, - "support": 2114.0 + "f1-score": 0.8689320388349515, + "precision": 0.7863418530351438, + "recall": 0.9709072978303748, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9098497495826378, - "precision": 0.8987595898759589, - "recall": 0.9212170206511354, - "support": 13607.0 + "f1-score": 0.8937734066010992, + "precision": 0.9312910284463894, + "recall": 0.8591615638247763, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.998190209030857, - "precision": 1.0, - "recall": 0.9963869569144612, - "support": 11071.0 + "f1-score": 0.9988009592326139, + "precision": 0.9994750656167979, + "recall": 0.9981277615517112, + "support": 13353.0 }, - "eval_accuracy": 0.9043816101877833, - "eval_loss": 0.30672115087509155, + "eval_accuracy": 0.8944749862561847, + "eval_loss": 0.37784042954444885, "eval_macro avg": { - "f1-score": 0.8448433615022248, - "precision": 0.8462691001707254, - "recall": 0.8442054631806213, - "support": 32431.0 - }, - "eval_runtime": 4.9018, - "eval_samples_per_second": 16.321, - "eval_steps_per_second": 2.04, + "f1-score": 0.8347594306590942, + "precision": 0.8122475954263937, + "recall": 0.8659643794779507, + "support": 36380.0 + }, + "eval_runtime": 5.0734, + "eval_samples_per_second": 15.966, + "eval_steps_per_second": 2.168, "eval_weighted avg": { - "f1-score": 0.9029367568413356, - "precision": 0.9020793621628027, - "recall": 0.9043816101877833, - "support": 32431.0 + "f1-score": 0.8971820344797716, + "precision": 0.9033144340485707, + "recall": 0.8944749862561847, + "support": 36380.0 }, "step": 486 }, { "epoch": 6.17, - "grad_norm": 0.43724608421325684, + "grad_norm": 21.778270721435547, "learning_rate": 1.7530864197530865e-05, - "loss": 0.2434, + "loss": 0.2481, "step": 500 }, { "epoch": 7.0, "eval_B-Claim": { - "f1-score": 0.6763285024154589, - "precision": 0.6907894736842105, - "recall": 0.6624605678233438, - "support": 317.0 + "f1-score": 0.6489859594383774, + "precision": 0.6887417218543046, + "recall": 0.6135693215339233, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.8952380952380952, - "precision": 0.88125, - "recall": 0.9096774193548387, - "support": 155.0 + "f1-score": 0.8852459016393444, + "precision": 0.9310344827586207, + "recall": 0.84375, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8941034897713597, - "precision": 0.8855780691299165, - "recall": 0.9027946537059538, - "support": 823.0 + "f1-score": 0.9011898603207449, + "precision": 0.8780241935483871, + "recall": 0.9256110520722636, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6960393719240684, - "precision": 0.7088305489260143, - "recall": 0.6837016574585635, - "support": 4344.0 + "f1-score": 0.626564114338193, + "precision": 0.6800398704211313, + "recall": 0.5808854831843338, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9140037593984962, - "precision": 0.9080298786181139, - "recall": 0.9200567644276254, - "support": 2114.0 + "f1-score": 0.8813559322033898, + "precision": 0.9049350649350649, + "recall": 0.8589743589743589, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9164355568530141, - "precision": 0.9101253895774444, - "recall": 0.9228338355258323, - "support": 13607.0 + "f1-score": 0.9073595340161655, + "precision": 0.8831698305516626, + "recall": 0.9329116479375547, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9967376529225193, + "f1-score": 0.9981994148098132, "precision": 1.0, - "recall": 0.9934965224460302, - "support": 11071.0 + "recall": 0.9964053021792856, + "support": 13353.0 }, - "eval_accuracy": 0.9116277635595572, - "eval_loss": 0.34577852487564087, + "eval_accuracy": 0.9030786146234194, + "eval_loss": 0.4109443426132202, "eval_macro avg": { - "f1-score": 0.855555204074716, - "precision": 0.8549433371336713, - "recall": 0.8564316315345982, - "support": 32431.0 - }, - "eval_runtime": 4.8989, - "eval_samples_per_second": 16.33, - "eval_steps_per_second": 2.041, + "f1-score": 0.8355572452522898, + "precision": 0.8522778805813102, + "recall": 0.8217295951259599, + "support": 36380.0 + }, + "eval_runtime": 5.0454, + "eval_samples_per_second": 16.054, + "eval_steps_per_second": 2.18, "eval_weighted avg": { - "f1-score": 0.9111536914902465, - "precision": 0.910801887328957, - "recall": 0.9116277635595572, - "support": 32431.0 + "f1-score": 0.9003265559019435, + "precision": 0.8992988510674564, + "recall": 0.9030786146234194, + "support": 36380.0 }, "step": 567 }, { "epoch": 8.0, "eval_B-Claim": { - "f1-score": 0.701923076923077, - "precision": 0.7133550488599348, - "recall": 0.6908517350157729, - "support": 317.0 + "f1-score": 0.7029972752043596, + "precision": 0.6531645569620254, + "recall": 0.7610619469026548, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9260450160771705, - "precision": 0.9230769230769231, - "recall": 0.9290322580645162, - "support": 155.0 + "f1-score": 0.9134328358208955, + "precision": 0.8742857142857143, + "recall": 0.95625, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8985507246376812, - "precision": 0.8931572629051621, - "recall": 0.9040097205346294, - "support": 823.0 + "f1-score": 0.8914728682170544, + "precision": 0.930635838150289, + "recall": 0.8554729011689692, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6915371329879102, - "precision": 0.6917760884588804, - "recall": 0.6912983425414365, - "support": 4344.0 + "f1-score": 0.6790487421383647, + "precision": 0.6307046367287331, + "recall": 0.7354193273733504, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9147286821705426, - "precision": 0.9085394307046197, - "recall": 0.9210028382213813, - "support": 2114.0 + "f1-score": 0.8939393939393939, + "precision": 0.836340206185567, + "recall": 0.9600591715976331, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.912640635340834, - "precision": 0.9131778382753293, - "recall": 0.9121040640846623, - "support": 13607.0 + "f1-score": 0.8993180625983564, + "precision": 0.9362166885102665, + "recall": 0.8652176838705337, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9993222177036736, - "precision": 0.9998191681735985, - "recall": 0.9988257609971999, - "support": 11071.0 + "f1-score": 0.9979819119515658, + "precision": 0.9960462513987318, + "recall": 0.9999251104620684, + "support": 13353.0 }, - "eval_accuracy": 0.9104252104467947, - "eval_loss": 0.3896510601043701, + "eval_accuracy": 0.902363936228697, + "eval_loss": 0.4763801097869873, "eval_macro avg": { - "f1-score": 0.8635353551201269, - "precision": 0.863271680064921, - "recall": 0.8638749599227997, - "support": 32431.0 - }, - "eval_runtime": 4.9786, - "eval_samples_per_second": 16.069, - "eval_steps_per_second": 2.009, + "f1-score": 0.8540272985528559, + "precision": 0.8367705560316182, + "recall": 0.8762008773393156, + "support": 36380.0 + }, + "eval_runtime": 5.0791, + "eval_samples_per_second": 15.948, + "eval_steps_per_second": 2.166, "eval_weighted avg": { - "f1-score": 0.9103982293589928, - "precision": 0.9103825180220693, - "recall": 0.9104252104467947, - "support": 32431.0 + "f1-score": 0.9048169208096876, + "precision": 0.9101018951943243, + "recall": 0.902363936228697, + "support": 36380.0 }, "step": 648 }, { "epoch": 9.0, "eval_B-Claim": { - "f1-score": 0.6862745098039215, - "precision": 0.6171284634760705, - "recall": 0.7728706624605678, - "support": 317.0 + "f1-score": 0.6884779516358464, + "precision": 0.6648351648351648, + "recall": 0.7138643067846607, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9119496855345912, - "precision": 0.8895705521472392, - "recall": 0.9354838709677419, - "support": 155.0 + "f1-score": 0.8860759493670887, + "precision": 0.8974358974358975, + "recall": 0.875, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8690095846645367, - "precision": 0.9164420485175202, - "recall": 0.8262454434993924, - "support": 823.0 + "f1-score": 0.8984416980118215, + "precision": 0.908695652173913, + "recall": 0.8884165781083954, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6841291120965247, - "precision": 0.619652531290865, - "recall": 0.7635819521178637, - "support": 4344.0 + "f1-score": 0.6685829873309911, + "precision": 0.6687965921192758, + "recall": 0.6683695189442316, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9025385312783318, - "precision": 0.8664055700609226, - "recall": 0.9418164616840113, - "support": 2114.0 + "f1-score": 0.8819011041766683, + "precision": 0.8592142188961647, + "recall": 0.9058185404339251, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.8949834831374357, - "precision": 0.9374748531423513, - "recall": 0.8561769677371941, - "support": 13607.0 + "f1-score": 0.9072658065820057, + "precision": 0.9083367057871308, + "recall": 0.9061974295134917, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9989151071331707, - "precision": 0.9998190209030857, - "recall": 0.9980128263029536, - "support": 11071.0 + "f1-score": 0.9971465044679734, + "precision": 0.9998494089300505, + "recall": 0.9944581741930653, + "support": 13353.0 }, - "eval_accuracy": 0.8965804323024267, - "eval_loss": 0.4196818470954895, + "eval_accuracy": 0.9054700384826828, + "eval_loss": 0.43875375390052795, "eval_macro avg": { - "f1-score": 0.8496857162355017, - "precision": 0.8352132913625792, - "recall": 0.8705983121099606, - "support": 32431.0 - }, - "eval_runtime": 4.9285, - "eval_samples_per_second": 16.232, - "eval_steps_per_second": 2.029, + "f1-score": 0.8468417145103421, + "precision": 0.843880520025371, + "recall": 0.8503035068539672, + "support": 36380.0 + }, + "eval_runtime": 5.0863, + "eval_samples_per_second": 15.925, + "eval_steps_per_second": 2.163, "eval_weighted avg": { - "f1-score": 0.9000940098435676, - "precision": 0.9076597589527613, - "recall": 0.8965804323024267, - "support": 32431.0 + "f1-score": 0.9056589487317133, + "precision": 0.9059462683069767, + "recall": 0.9054700384826828, + "support": 36380.0 }, "step": 729 }, { "epoch": 10.0, "eval_B-Claim": { - "f1-score": 0.6905537459283387, - "precision": 0.7138047138047138, - "recall": 0.668769716088328, - "support": 317.0 + "f1-score": 0.6910569105691057, + "precision": 0.6390977443609023, + "recall": 0.7522123893805309, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9142857142857143, - "precision": 0.9, - "recall": 0.9290322580645162, - "support": 155.0 + "f1-score": 0.9046153846153846, + "precision": 0.8909090909090909, + "recall": 0.91875, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.894484412470024, - "precision": 0.8828402366863906, - "recall": 0.9064398541919806, - "support": 823.0 + "f1-score": 0.8888888888888888, + "precision": 0.9213226909920182, + "recall": 0.8586609989373007, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.681682755333095, - "precision": 0.7066963182604399, - "recall": 0.6583793738489871, - "support": 4344.0 + "f1-score": 0.6645316253002403, + "precision": 0.6271250472232717, + "recall": 0.7066836951894423, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9083702494754021, - "precision": 0.895632183908046, - "recall": 0.9214758751182592, - "support": 2114.0 + "f1-score": 0.886980737990253, + "precision": 0.8377904427882508, + "recall": 0.9423076923076923, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9129089784183134, - "precision": 0.904253785147801, - "recall": 0.9217314617476299, - "support": 13607.0 + "f1-score": 0.8983343700324833, + "precision": 0.9233501456276195, + "recall": 0.8746383150528229, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9983716301791207, - "precision": 0.999909395669113, - "recall": 0.9968385873001535, - "support": 11071.0 + "f1-score": 0.9975225225225225, + "precision": 1.0, + "recall": 0.9950572904965176, + "support": 13353.0 }, - "eval_accuracy": 0.9092534920292313, - "eval_loss": 0.5311424136161804, + "eval_accuracy": 0.8995601979109401, + "eval_loss": 0.5381875038146973, "eval_macro avg": { - "f1-score": 0.8572367837271441, - "precision": 0.8575909476395005, - "recall": 0.857523875194265, - "support": 32431.0 - }, - "eval_runtime": 4.8937, - "eval_samples_per_second": 16.348, - "eval_steps_per_second": 2.043, + "f1-score": 0.8474186342741253, + "precision": 0.8342278802715933, + "recall": 0.8640443401949011, + "support": 36380.0 + }, + "eval_runtime": 5.099, + "eval_samples_per_second": 15.885, + "eval_steps_per_second": 2.157, "eval_weighted avg": { - "f1-score": 0.9081813659228948, - "precision": 0.9074585127295745, - "recall": 0.9092534920292313, - "support": 32431.0 + "f1-score": 0.9017669881051834, + "precision": 0.9056169116577473, + "recall": 0.8995601979109401, + "support": 36380.0 }, "step": 810 }, { "epoch": 11.0, "eval_B-Claim": { - "f1-score": 0.680921052631579, - "precision": 0.711340206185567, - "recall": 0.6529968454258676, - "support": 317.0 + "f1-score": 0.7103825136612023, + "precision": 0.6615776081424937, + "recall": 0.7669616519174042, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9009584664536742, - "precision": 0.8924050632911392, - "recall": 0.9096774193548387, - "support": 155.0 + "f1-score": 0.9032258064516129, + "precision": 0.850828729281768, + "recall": 0.9625, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8942020322773461, - "precision": 0.88, - "recall": 0.9088699878493317, - "support": 823.0 + "f1-score": 0.8955223880597015, + "precision": 0.9331797235023042, + "recall": 0.8607863974495218, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6838570400575678, - "precision": 0.713820731096645, - "recall": 0.6563075506445673, - "support": 4344.0 + "f1-score": 0.685932169375878, + "precision": 0.6488230827638573, + "recall": 0.7275436355896125, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.902502979737783, - "precision": 0.9096588178760211, - "recall": 0.8954588457899716, - "support": 2114.0 + "f1-score": 0.8921899617375647, + "precision": 0.820703933747412, + "recall": 0.9773175542406312, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9134434303925829, - "precision": 0.9004641199571581, - "recall": 0.9268023811273609, - "support": 13607.0 + "f1-score": 0.9036754507628294, + "precision": 0.932183990271121, + "recall": 0.8768588924029338, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9988699543461556, - "precision": 0.999728555917481, - "recall": 0.9980128263029536, - "support": 11071.0 + "f1-score": 0.9971084156370874, + "precision": 1.0, + "recall": 0.9942335055792706, + "support": 13353.0 }, - "eval_accuracy": 0.9096235083716198, - "eval_loss": 0.49213308095932007, + "eval_accuracy": 0.9051951621770203, + "eval_loss": 0.5869538187980652, "eval_macro avg": { - "f1-score": 0.8535364222709555, - "precision": 0.8582024991891446, - "recall": 0.8497322652135558, - "support": 32431.0 - }, - "eval_runtime": 4.9244, - "eval_samples_per_second": 16.246, - "eval_steps_per_second": 2.031, + "f1-score": 0.8554338150979823, + "precision": 0.8353281525298509, + "recall": 0.880885948168482, + "support": 36380.0 + }, + "eval_runtime": 5.0923, + "eval_samples_per_second": 15.906, + "eval_steps_per_second": 2.16, "eval_weighted avg": { - "f1-score": 0.9083194817194259, - "precision": 0.9075428987655072, - "recall": 0.9096235083716198, - "support": 32431.0 + "f1-score": 0.9071963301332511, + "precision": 0.9114149044954519, + "recall": 0.9051951621770203, + "support": 36380.0 }, "step": 891 }, { "epoch": 12.0, "eval_B-Claim": { - "f1-score": 0.6577181208053691, - "precision": 0.7025089605734767, - "recall": 0.6182965299684543, - "support": 317.0 + "f1-score": 0.6758409785932722, + "precision": 0.7015873015873015, + "recall": 0.6519174041297935, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.8978328173374612, - "precision": 0.8630952380952381, - "recall": 0.9354838709677419, - "support": 155.0 + "f1-score": 0.916923076923077, + "precision": 0.9030303030303031, + "recall": 0.93125, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8902147971360382, - "precision": 0.8745603751465416, - "recall": 0.9064398541919806, - "support": 823.0 + "f1-score": 0.8989473684210527, + "precision": 0.8905109489051095, + "recall": 0.9075451647183846, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6518804243008679, - "precision": 0.6842105263157895, - "recall": 0.6224677716390423, - "support": 4344.0 + "f1-score": 0.6533197602081212, + "precision": 0.6970794110547912, + "recall": 0.6147296722009365, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.8919829328542555, - "precision": 0.8490808037622916, - "recall": 0.9394512771996215, - "support": 2114.0 + "f1-score": 0.9200480192076831, + "precision": 0.8965839962564343, + "recall": 0.9447731755424064, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9075452422650322, - "precision": 0.9011665821317296, - "recall": 0.9140148453002132, - "support": 13607.0 + "f1-score": 0.9082653940478558, + "precision": 0.8948605759811924, + "recall": 0.922077922077922, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9985526910900046, - "precision": 1.0, - "recall": 0.9971095655315689, - "support": 11071.0 + "f1-score": 0.9988389947942025, + "precision": 0.9990260713215463, + "recall": 0.9986519883172321, + "support": 13353.0 }, - "eval_accuracy": 0.9020073386574574, - "eval_loss": 0.5465030670166016, + "eval_accuracy": 0.9089059923034635, + "eval_loss": 0.5580677390098572, "eval_macro avg": { - "f1-score": 0.8422467179698613, - "precision": 0.8392317837178667, - "recall": 0.8476091021140889, - "support": 32431.0 - }, - "eval_runtime": 4.8988, - "eval_samples_per_second": 16.331, - "eval_steps_per_second": 2.041, + "f1-score": 0.8531690845993235, + "precision": 0.854668372590954, + "recall": 0.852992189569525, + "support": 36380.0 + }, + "eval_runtime": 5.0574, + "eval_samples_per_second": 16.016, + "eval_steps_per_second": 2.175, "eval_weighted avg": { - "f1-score": 0.9004246846165758, - "precision": 0.8996509302731299, - "recall": 0.9020073386574574, - "support": 32431.0 + "f1-score": 0.9068749310769137, + "precision": 0.9057713940131727, + "recall": 0.9089059923034635, + "support": 36380.0 }, "step": 972 }, { "epoch": 12.35, - "grad_norm": 0.1318705528974533, + "grad_norm": 0.45772114396095276, "learning_rate": 1.506172839506173e-05, - "loss": 0.0258, + "loss": 0.0314, "step": 1000 }, { "epoch": 13.0, "eval_B-Claim": { - "f1-score": 0.6857142857142857, - "precision": 0.6551724137931034, - "recall": 0.7192429022082019, - "support": 317.0 + "f1-score": 0.6760563380281691, + "precision": 0.72, + "recall": 0.6371681415929203, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9022082018927444, - "precision": 0.8827160493827161, - "recall": 0.9225806451612903, - "support": 155.0 + "f1-score": 0.9171974522292993, + "precision": 0.935064935064935, + "recall": 0.9, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8858560794044665, - "precision": 0.9049429657794676, - "recall": 0.8675577156743621, - "support": 823.0 + "f1-score": 0.905503634475597, + "precision": 0.8852791878172589, + "recall": 0.926673751328374, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6909411894516165, - "precision": 0.6634880271243908, - "recall": 0.7207642725598526, - "support": 4344.0 + "f1-score": 0.6522588803310726, + "precision": 0.7090727318170458, + "recall": 0.6038739889314602, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9048498845265589, - "precision": 0.8840252707581228, - "recall": 0.9266792809839167, - "support": 2114.0 + "f1-score": 0.9228121927236971, + "precision": 0.9200980392156862, + "recall": 0.9255424063116371, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.906777130915062, - "precision": 0.9211464098870271, - "recall": 0.8928492687587272, - "support": 13607.0 + "f1-score": 0.9102441910145594, + "precision": 0.8896312475909033, + "recall": 0.9318350043738645, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9971466099008107, + "f1-score": 0.9992880428673137, "precision": 1.0, - "recall": 0.9943094571402764, - "support": 11071.0 + "recall": 0.9985770987793006, + "support": 13353.0 }, - "eval_accuracy": 0.9044432795781814, - "eval_loss": 0.5739177465438843, + "eval_accuracy": 0.9106102253985706, + "eval_loss": 0.5264647603034973, "eval_macro avg": { - "f1-score": 0.8533561974007922, - "precision": 0.8444987338178326, - "recall": 0.8634262203552325, - "support": 32431.0 - }, - "eval_runtime": 4.929, - "eval_samples_per_second": 16.231, - "eval_steps_per_second": 2.029, + "f1-score": 0.8547658188099584, + "precision": 0.8655923059294041, + "recall": 0.8462386273310795, + "support": 36380.0 + }, + "eval_runtime": 5.0562, + "eval_samples_per_second": 16.02, + "eval_steps_per_second": 2.176, "eval_weighted avg": { - "f1-score": 0.9058771229581195, - "precision": 0.9079380628166581, - "recall": 0.9044432795781814, - "support": 32431.0 + "f1-score": 0.9080379636257123, + "precision": 0.9070294280758862, + "recall": 0.9106102253985706, + "support": 36380.0 }, "step": 1053 }, { "epoch": 14.0, "eval_B-Claim": { - "f1-score": 0.6666666666666666, - "precision": 0.7276119402985075, - "recall": 0.6151419558359621, - "support": 317.0 + "f1-score": 0.6980609418282548, + "precision": 0.6579634464751958, + "recall": 0.7433628318584071, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9003215434083601, - "precision": 0.8974358974358975, - "recall": 0.9032258064516129, - "support": 155.0 + "f1-score": 0.9226006191950465, + "precision": 0.9141104294478528, + "recall": 0.93125, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8976470588235295, - "precision": 0.8700114025085519, - "recall": 0.9270959902794653, - "support": 823.0 + "f1-score": 0.8921568627450981, + "precision": 0.9150837988826815, + "recall": 0.8703506907545164, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6725553620303559, - "precision": 0.7317271250676773, - "recall": 0.6222375690607734, - "support": 4344.0 + "f1-score": 0.6821321785751109, + "precision": 0.6614677064587082, + "recall": 0.7041294167730949, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.8921406655652585, - "precision": 0.8902496467263307, - "recall": 0.8940397350993378, - "support": 2114.0 + "f1-score": 0.9124910093502757, + "precision": 0.8880074661689221, + "recall": 0.9383629191321499, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9186321600114704, - "precision": 0.8966482401511441, - "recall": 0.9417211729257, - "support": 13607.0 + "f1-score": 0.9052323204796293, + "precision": 0.9166609175577786, + "recall": 0.894085189421977, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9977821029285294, + "f1-score": 0.9980114808839531, "precision": 1.0, - "recall": 0.995574022220215, - "support": 11071.0 + "recall": 0.9960308544896278, + "support": 13353.0 }, - "eval_accuracy": 0.9104560451419937, - "eval_loss": 0.5471230745315552, + "eval_accuracy": 0.9075865860362837, + "eval_loss": 0.6585939526557922, "eval_macro avg": { - "f1-score": 0.8493922227763101, - "precision": 0.8590977503125871, - "recall": 0.842719464553295, - "support": 32431.0 - }, - "eval_runtime": 4.9223, - "eval_samples_per_second": 16.253, - "eval_steps_per_second": 2.032, + "f1-score": 0.8586693447224812, + "precision": 0.850470537855877, + "recall": 0.8682245574899676, + "support": 36380.0 + }, + "eval_runtime": 5.0606, + "eval_samples_per_second": 16.006, + "eval_steps_per_second": 2.174, "eval_weighted avg": { - "f1-score": 0.9078809171606194, - "precision": 0.9070974660556812, - "recall": 0.9104560451419937, - "support": 32431.0 + "f1-score": 0.9086880683615599, + "precision": 0.9102351141829325, + "recall": 0.9075865860362837, + "support": 36380.0 }, "step": 1134 }, { "epoch": 15.0, "eval_B-Claim": { - "f1-score": 0.6688524590163935, - "precision": 0.6962457337883959, - "recall": 0.6435331230283912, - "support": 317.0 + "f1-score": 0.6920731707317073, + "precision": 0.7160883280757098, + "recall": 0.6696165191740413, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9061488673139159, - "precision": 0.9090909090909091, - "recall": 0.9032258064516129, - "support": 155.0 + "f1-score": 0.90282131661442, + "precision": 0.9056603773584906, + "recall": 0.9, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8902147971360382, - "precision": 0.8745603751465416, - "recall": 0.9064398541919806, - "support": 823.0 + "f1-score": 0.9076600209863588, + "precision": 0.8963730569948186, + "recall": 0.9192348565356004, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6658545498842734, - "precision": 0.7071151358344114, - "recall": 0.6291436464088398, - "support": 4344.0 + "f1-score": 0.6644877718922575, + "precision": 0.7061077844311378, + "recall": 0.6275010642826735, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.8983730252298987, - "precision": 0.8956276445698167, - "recall": 0.9011352885525071, - "support": 2114.0 + "f1-score": 0.9016511127063891, + "precision": 0.8758716875871687, + "recall": 0.9289940828402367, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9134990605578841, - "precision": 0.8985002487739001, - "recall": 0.9290071286837657, - "support": 13607.0 + "f1-score": 0.9134082844161443, + "precision": 0.9005362280931206, + "recall": 0.9266536572236054, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9998645047649157, - "precision": 0.9999096657633243, - "recall": 0.999819347845723, - "support": 11071.0 + "f1-score": 0.9987253505286046, + "precision": 0.9999249305607687, + "recall": 0.9975286452482588, + "support": 13353.0 }, - "eval_accuracy": 0.9077117572692794, - "eval_loss": 0.6182886362075806, + "eval_accuracy": 0.9114623419461243, + "eval_loss": 0.6387954950332642, "eval_macro avg": { - "f1-score": 0.8489724662719027, - "precision": 0.8544356732810428, - "recall": 0.8446148850232601, - "support": 32431.0 - }, - "eval_runtime": 4.9224, - "eval_samples_per_second": 16.252, - "eval_steps_per_second": 2.032, + "f1-score": 0.854403861125126, + "precision": 0.8572231990144593, + "recall": 0.8527898321863451, + "support": 36380.0 + }, + "eval_runtime": 5.0835, + "eval_samples_per_second": 15.934, + "eval_steps_per_second": 2.164, "eval_weighted avg": { - "f1-score": 0.9058074210256117, - "precision": 0.9047621091758847, - "recall": 0.9077117572692794, - "support": 32431.0 + "f1-score": 0.9096653222792656, + "precision": 0.9087293921765042, + "recall": 0.9114623419461243, + "support": 36380.0 }, "step": 1215 }, { "epoch": 16.0, "eval_B-Claim": { - "f1-score": 0.6643835616438356, - "precision": 0.7265917602996255, - "recall": 0.61198738170347, - "support": 317.0 + "f1-score": 0.6602254428341385, + "precision": 0.7269503546099291, + "recall": 0.6047197640117994, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9113924050632912, - "precision": 0.8944099378881988, - "recall": 0.9290322580645162, - "support": 155.0 + "f1-score": 0.8903225806451613, + "precision": 0.92, + "recall": 0.8625, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8954518606024808, - "precision": 0.871264367816092, - "recall": 0.9210206561360875, - "support": 823.0 + "f1-score": 0.9055441478439425, + "precision": 0.8758689175769613, + "recall": 0.9373007438894793, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6607098535616778, - "precision": 0.7167474421109317, - "recall": 0.6127992633517495, - "support": 4344.0 + "f1-score": 0.6361776387050188, + "precision": 0.7319302132373304, + "recall": 0.5625798212005109, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9103066635923449, - "precision": 0.8879892037786775, - "recall": 0.9337748344370861, - "support": 2114.0 + "f1-score": 0.9097614949594295, + "precision": 0.9073075036782736, + "recall": 0.9122287968441815, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9118409680207432, - "precision": 0.8940046606878045, - "recall": 0.9304034688028221, - "support": 13607.0 + "f1-score": 0.9127908863717504, + "precision": 0.8816300940438871, + "recall": 0.9462351120382209, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9983714828553334, - "precision": 1.0, - "recall": 0.9967482612230151, - "support": 11071.0 + "f1-score": 0.9993256911665542, + "precision": 0.9997751293006522, + "recall": 0.9988766569310268, + "support": 13353.0 }, - "eval_accuracy": 0.90737257562209, - "eval_loss": 0.6626638174057007, + "eval_accuracy": 0.9103353490929081, + "eval_loss": 0.6975058317184448, "eval_macro avg": { - "f1-score": 0.8503509707628153, - "precision": 0.8558581960830471, - "recall": 0.8479665891026781, - "support": 32431.0 - }, - "eval_runtime": 4.9177, - "eval_samples_per_second": 16.268, - "eval_steps_per_second": 2.033, + "f1-score": 0.844878268932285, + "precision": 0.8633517446352906, + "recall": 0.8320629849878884, + "support": 36380.0 + }, + "eval_runtime": 5.0731, + "eval_samples_per_second": 15.967, + "eval_steps_per_second": 2.168, "eval_weighted avg": { - "f1-score": 0.9048051223322606, - "precision": 0.9038418545958269, - "recall": 0.90737257562209, - "support": 32431.0 + "f1-score": 0.9060232340867683, + "precision": 0.9056723119856793, + "recall": 0.9103353490929081, + "support": 36380.0 }, "step": 1296 }, { "epoch": 17.0, "eval_B-Claim": { - "f1-score": 0.6856240126382307, - "precision": 0.6867088607594937, - "recall": 0.6845425867507886, - "support": 317.0 + "f1-score": 0.6964285714285714, + "precision": 0.7027027027027027, + "recall": 0.6902654867256637, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9131832797427654, - "precision": 0.9102564102564102, - "recall": 0.9161290322580645, - "support": 155.0 + "f1-score": 0.903954802259887, + "precision": 0.8247422680412371, + "recall": 1.0, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8919902912621359, - "precision": 0.8909090909090909, - "recall": 0.8930741190765492, - "support": 823.0 + "f1-score": 0.9045822102425876, + "precision": 0.9179431072210066, + "recall": 0.8916046758767269, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6720430107526882, - "precision": 0.682573599240266, - "recall": 0.6618324125230203, - "support": 4344.0 + "f1-score": 0.6845814977973568, + "precision": 0.7092651757188498, + "recall": 0.6615581098339719, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9010318949343339, - "precision": 0.8934883720930232, - "recall": 0.9087038789025544, - "support": 2114.0 + "f1-score": 0.8985378821444396, + "precision": 0.8157683024939663, + "recall": 1.0, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9102217019097096, - "precision": 0.9062431703941138, - "recall": 0.9142353200558536, - "support": 13607.0 + "f1-score": 0.9151271572832584, + "precision": 0.9186330349877949, + "recall": 0.9116479375546733, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9988243805389763, + "f1-score": 0.9988753936122357, "precision": 1.0, - "recall": 0.9976515219943998, - "support": 11071.0 + "recall": 0.9977533138620535, + "support": 13353.0 }, - "eval_accuracy": 0.90576917147174, - "eval_loss": 0.6958470940589905, + "eval_accuracy": 0.9136888400219901, + "eval_loss": 0.692401111125946, "eval_macro avg": { - "f1-score": 0.8532740816826916, - "precision": 0.8528827862360568, - "recall": 0.853738410223033, - "support": 32431.0 - }, - "eval_runtime": 4.9433, - "eval_samples_per_second": 16.183, - "eval_steps_per_second": 2.023, + "f1-score": 0.8574410735383337, + "precision": 0.8412935130236511, + "recall": 0.878975646264727, + "support": 36380.0 + }, + "eval_runtime": 5.0854, + "eval_samples_per_second": 15.928, + "eval_steps_per_second": 2.163, "eval_weighted avg": { - "f1-score": 0.9053221272704657, - "precision": 0.9049421722279843, - "recall": 0.90576917147174, - "support": 32431.0 + "f1-score": 0.9128097974394099, + "precision": 0.9132839716585025, + "recall": 0.9136888400219901, + "support": 36380.0 }, "step": 1377 }, { "epoch": 18.0, "eval_B-Claim": { - "f1-score": 0.688, - "precision": 0.698051948051948, - "recall": 0.6782334384858044, - "support": 317.0 + "f1-score": 0.6927899686520376, + "precision": 0.7391304347826086, + "recall": 0.6519174041297935, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9180327868852459, - "precision": 0.9333333333333333, - "recall": 0.9032258064516129, - "support": 155.0 + "f1-score": 0.9192546583850932, + "precision": 0.9135802469135802, + "recall": 0.925, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8942307692307692, - "precision": 0.8846611177170036, - "recall": 0.9040097205346294, - "support": 823.0 + "f1-score": 0.9089016137428423, + "precision": 0.8908163265306123, + "recall": 0.9277364505844846, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6798171758479673, - "precision": 0.7118387909319899, - "recall": 0.6505524861878453, - "support": 4344.0 + "f1-score": 0.6788553259141495, + "precision": 0.7276046738072055, + "recall": 0.6362281822051937, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9276937618147448, - "precision": 0.9268177525967894, - "recall": 0.9285714285714286, - "support": 2114.0 + "f1-score": 0.9119420989143546, + "precision": 0.8927727916863486, + "recall": 0.9319526627218935, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9138506163886876, - "precision": 0.9018822013883919, - "recall": 0.9261409568604395, - "support": 13607.0 + "f1-score": 0.9172837873578418, + "precision": 0.9016052511860662, + "recall": 0.9335172599421304, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9999096739228616, - "precision": 0.9999096739228616, - "recall": 0.9999096739228616, - "support": 11071.0 + "f1-score": 0.9990254872563717, + "precision": 1.0, + "recall": 0.9980528720137797, + "support": 13353.0 }, - "eval_accuracy": 0.911473590083562, - "eval_loss": 0.6612581014633179, + "eval_accuracy": 0.915915338097856, + "eval_loss": 0.6629185676574707, "eval_macro avg": { - "f1-score": 0.8602192548700395, - "precision": 0.865213545420331, - "recall": 0.8558062158592316, - "support": 32431.0 - }, - "eval_runtime": 4.9167, - "eval_samples_per_second": 16.271, - "eval_steps_per_second": 2.034, + "f1-score": 0.861150420031813, + "precision": 0.866501389272346, + "recall": 0.8577721187996108, + "support": 36380.0 + }, + "eval_runtime": 5.0772, + "eval_samples_per_second": 15.954, + "eval_steps_per_second": 2.167, "eval_weighted avg": { - "f1-score": 0.9100977079524204, - "precision": 0.9092367421655284, - "recall": 0.911473590083562, - "support": 32431.0 + "f1-score": 0.9138987233713269, + "precision": 0.9130176425817779, + "recall": 0.915915338097856, + "support": 36380.0 }, "step": 1458 }, { "epoch": 18.52, - "grad_norm": 0.010771242901682854, + "grad_norm": 34.05133056640625, "learning_rate": 1.2592592592592593e-05, - "loss": 0.0083, + "loss": 0.0105, "step": 1500 }, { "epoch": 19.0, "eval_B-Claim": { - "f1-score": 0.6950819672131147, - "precision": 0.7235494880546075, - "recall": 0.668769716088328, - "support": 317.0 + "f1-score": 0.6967340590979781, + "precision": 0.7368421052631579, + "recall": 0.6607669616519174, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9049180327868852, - "precision": 0.92, - "recall": 0.8903225806451613, - "support": 155.0 + "f1-score": 0.9244712990936558, + "precision": 0.8947368421052632, + "recall": 0.95625, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.9021479713603818, - "precision": 0.8862837045720985, - "recall": 0.9185905224787363, - "support": 823.0 + "f1-score": 0.9071840587309911, + "precision": 0.8954451345755694, + "recall": 0.9192348565356004, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6773359596208298, - "precision": 0.7279703625297698, - "recall": 0.6332872928176796, - "support": 4344.0 + "f1-score": 0.6769585253456222, + "precision": 0.7378201908588649, + "recall": 0.6253724989357173, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.8959923664122137, - "precision": 0.9037536092396535, - "recall": 0.8883632923368022, - "support": 2114.0 + "f1-score": 0.9125809435707678, + "precision": 0.8593205574912892, + "recall": 0.972879684418146, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9165171200918814, - "precision": 0.8956857243072606, - "recall": 0.9383405600058793, - "support": 13607.0 + "f1-score": 0.916641828117238, + "precision": 0.9025042389461327, + "recall": 0.9312293923692887, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9978274644699919, + "f1-score": 0.9990254872563717, "precision": 1.0, - "recall": 0.9956643482973534, - "support": 11071.0 + "recall": 0.9980528720137797, + "support": 13353.0 }, - "eval_accuracy": 0.9104252104467947, - "eval_loss": 0.6561155319213867, + "eval_accuracy": 0.9158603628367235, + "eval_loss": 0.69818514585495, "eval_macro avg": { - "f1-score": 0.8556886974221856, - "precision": 0.865320412671913, - "recall": 0.8476197589528487, - "support": 32431.0 - }, - "eval_runtime": 4.9567, - "eval_samples_per_second": 16.14, - "eval_steps_per_second": 2.017, + "f1-score": 0.8619423144589463, + "precision": 0.8609527241771824, + "recall": 0.8662551808463499, + "support": 36380.0 + }, + "eval_runtime": 5.0766, + "eval_samples_per_second": 15.956, + "eval_steps_per_second": 2.167, "eval_weighted avg": { - "f1-score": 0.9083143486955053, - "precision": 0.9075517471550982, - "recall": 0.9104252104467947, - "support": 32431.0 + "f1-score": 0.9134424215990907, + "precision": 0.912854812597098, + "recall": 0.9158603628367235, + "support": 36380.0 }, "step": 1539 }, { "epoch": 20.0, "eval_B-Claim": { - "f1-score": 0.6833333333333333, - "precision": 0.7243816254416962, - "recall": 0.6466876971608833, - "support": 317.0 + "f1-score": 0.7078313253012049, + "precision": 0.7230769230769231, + "recall": 0.6932153392330384, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9050632911392404, - "precision": 0.8881987577639752, - "recall": 0.9225806451612903, - "support": 155.0 + "f1-score": 0.9386503067484663, + "precision": 0.9216867469879518, + "recall": 0.95625, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8997613365155133, - "precision": 0.8839390386869871, - "recall": 0.9161603888213852, - "support": 823.0 + "f1-score": 0.9058201058201059, + "precision": 0.9020021074815595, + "recall": 0.9096705632306057, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6925646418409007, - "precision": 0.7485958812516715, - "recall": 0.6443370165745856, - "support": 4344.0 + "f1-score": 0.6867509620670699, + "precision": 0.710256993404594, + "recall": 0.6647509578544061, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9094717244589248, - "precision": 0.8950984883188273, - "recall": 0.924314096499527, - "support": 2114.0 + "f1-score": 0.9304635761589404, + "precision": 0.894090909090909, + "recall": 0.9699211045364892, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9203195854027206, - "precision": 0.9017561182029762, - "recall": 0.9396634085397222, - "support": 13607.0 + "f1-score": 0.9132413977774803, + "precision": 0.9085581085581086, + "recall": 0.9179732184913532, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9982808541440463, - "precision": 1.0, - "recall": 0.9965676090687381, - "support": 11071.0 + "f1-score": 0.9989880439263896, + "precision": 0.9999249699879952, + "recall": 0.9980528720137797, + "support": 13353.0 }, - "eval_accuracy": 0.9149887453362523, - "eval_loss": 0.6437746286392212, + "eval_accuracy": 0.9154205607476635, + "eval_loss": 0.6933491826057434, "eval_macro avg": { - "f1-score": 0.8583992524049543, - "precision": 0.8631385585237333, - "recall": 0.8557586945465904, - "support": 32431.0 - }, - "eval_runtime": 4.9174, - "eval_samples_per_second": 16.269, - "eval_steps_per_second": 2.034, + "f1-score": 0.8688208168285224, + "precision": 0.8656566797982916, + "recall": 0.8728334364799532, + "support": 36380.0 + }, + "eval_runtime": 5.07, + "eval_samples_per_second": 15.976, + "eval_steps_per_second": 2.17, "eval_weighted avg": { - "f1-score": 0.9128082705193213, - "precision": 0.9120938553978903, - "recall": 0.9149887453362523, - "support": 32431.0 + "f1-score": 0.9144315421411552, + "precision": 0.9138389454030841, + "recall": 0.9154205607476635, + "support": 36380.0 }, "step": 1620 }, { "epoch": 21.0, "eval_B-Claim": { - "f1-score": 0.6677631578947368, - "precision": 0.697594501718213, - "recall": 0.6403785488958991, - "support": 317.0 + "f1-score": 0.7069219440353463, + "precision": 0.7058823529411765, + "recall": 0.7079646017699115, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.8903654485049834, - "precision": 0.9178082191780822, - "recall": 0.864516129032258, - "support": 155.0 + "f1-score": 0.9153605015673982, + "precision": 0.9182389937106918, + "recall": 0.9125, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8954869358669834, - "precision": 0.875725900116144, - "recall": 0.9161603888213852, - "support": 823.0 + "f1-score": 0.9070631970260223, + "precision": 0.9065817409766455, + "recall": 0.9075451647183846, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6630760023937762, - "precision": 0.6906008476689105, - "recall": 0.6376611418047882, - "support": 4344.0 + "f1-score": 0.6948799473741915, + "precision": 0.7164820257743613, + "recall": 0.6745423584504044, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.8958130477117819, - "precision": 0.9227683049147443, - "recall": 0.8703878902554399, - "support": 2114.0 + "f1-score": 0.9177033492822966, + "precision": 0.8912639405204461, + "recall": 0.9457593688362919, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9106195648251723, - "precision": 0.8945129732028924, - "recall": 0.9273168222238554, - "support": 13607.0 + "f1-score": 0.9171740148524787, + "precision": 0.9119270937271337, + "recall": 0.9224816634143059, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9977821029285294, - "precision": 1.0, - "recall": 0.995574022220215, - "support": 11071.0 + "f1-score": 0.9989506820566633, + "precision": 0.9997749606181082, + "recall": 0.9981277615517112, + "support": 13353.0 }, - "eval_accuracy": 0.9047207918349727, - "eval_loss": 0.7204756736755371, + "eval_accuracy": 0.9170973062122045, + "eval_loss": 0.6906760334968567, "eval_macro avg": { - "f1-score": 0.8458437514465661, - "precision": 0.8570015352569981, - "recall": 0.8359992776076915, - "support": 32431.0 - }, - "eval_runtime": 4.9231, - "eval_samples_per_second": 16.25, - "eval_steps_per_second": 2.031, + "f1-score": 0.8654362337420567, + "precision": 0.8643073011812233, + "recall": 0.866988702677287, + "support": 36380.0 + }, + "eval_runtime": 5.0608, + "eval_samples_per_second": 16.005, + "eval_steps_per_second": 2.174, "eval_weighted avg": { - "f1-score": 0.9033970685809433, - "precision": 0.9027615692407923, - "recall": 0.9047207918349727, - "support": 32431.0 + "f1-score": 0.9162839627546457, + "precision": 0.9157494555353419, + "recall": 0.9170973062122045, + "support": 36380.0 }, "step": 1701 }, { "epoch": 22.0, "eval_B-Claim": { - "f1-score": 0.6976, - "precision": 0.7077922077922078, - "recall": 0.6876971608832808, - "support": 317.0 + "f1-score": 0.7186147186147187, + "precision": 0.7033898305084746, + "recall": 0.7345132743362832, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9155844155844156, - "precision": 0.9215686274509803, - "recall": 0.9096774193548387, - "support": 155.0 + "f1-score": 0.9325153374233127, + "precision": 0.9156626506024096, + "recall": 0.95, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8986731001206273, - "precision": 0.8922155688622755, - "recall": 0.905224787363305, - "support": 823.0 + "f1-score": 0.9044038668098818, + "precision": 0.9142236699239956, + "recall": 0.8947927736450585, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.695459320649002, - "precision": 0.7054226852948141, - "recall": 0.6857734806629834, - "support": 4344.0 + "f1-score": 0.697543125980136, + "precision": 0.685432504622971, + "recall": 0.7100893997445722, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9150853889943075, - "precision": 0.917697431018078, - "recall": 0.9124881740775781, - "support": 2114.0 + "f1-score": 0.9283000949667617, + "precision": 0.8951465201465202, + "recall": 0.9640039447731755, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9140016812251014, - "precision": 0.9091173476806748, - "recall": 0.9189387815095171, - "support": 13607.0 + "f1-score": 0.9106359835480471, + "precision": 0.9201126528369281, + "recall": 0.9013525334768858, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.999322095177837, - "precision": 1.0, - "recall": 0.9986451088429229, - "support": 11071.0 + "f1-score": 0.9989131656860174, + "precision": 0.999774943735934, + "recall": 0.9980528720137797, + "support": 13353.0 }, - "eval_accuracy": 0.9118436064259505, - "eval_loss": 0.7220445871353149, + "eval_accuracy": 0.91412864211105, + "eval_loss": 0.7237672209739685, "eval_macro avg": { - "f1-score": 0.8622465716787558, - "precision": 0.8648305525855757, - "recall": 0.8597778446706323, - "support": 32431.0 - }, - "eval_runtime": 4.8992, - "eval_samples_per_second": 16.329, - "eval_steps_per_second": 2.041, + "f1-score": 0.8701323275755536, + "precision": 0.8619632531967476, + "recall": 0.8789721139985364, + "support": 36380.0 + }, + "eval_runtime": 5.0999, + "eval_samples_per_second": 15.883, + "eval_steps_per_second": 2.157, "eval_weighted avg": { - "f1-score": 0.9114287052262304, - "precision": 0.9110799834813146, - "recall": 0.9118436064259505, - "support": 32431.0 + "f1-score": 0.9146496958116109, + "precision": 0.9154631021750581, + "recall": 0.91412864211105, + "support": 36380.0 }, "step": 1782 }, { "epoch": 23.0, "eval_B-Claim": { - "f1-score": 0.6832504145936982, - "precision": 0.7202797202797203, - "recall": 0.6498422712933754, - "support": 317.0 + "f1-score": 0.6973293768545994, + "precision": 0.7014925373134329, + "recall": 0.6932153392330384, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.910828025477707, - "precision": 0.89937106918239, - "recall": 0.9225806451612903, - "support": 155.0 + "f1-score": 0.9090909090909091, + "precision": 0.8823529411764706, + "recall": 0.9375, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8956469886702446, - "precision": 0.8793911007025761, - "recall": 0.9125151883353585, - "support": 823.0 + "f1-score": 0.903672166045769, + "precision": 0.9051172707889126, + "recall": 0.9022316684378321, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.676206645646374, - "precision": 0.7145053818554588, - "recall": 0.641804788213628, - "support": 4344.0 + "f1-score": 0.6736772921459087, + "precision": 0.6939742721733243, + "recall": 0.6545338441890166, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9027875380651206, - "precision": 0.894199535962877, - "recall": 0.9115421002838221, - "support": 2114.0 + "f1-score": 0.9026837806301049, + "precision": 0.8568896765618077, + "recall": 0.9536489151873767, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.913811834576664, - "precision": 0.9007639037624046, - "recall": 0.9272433306386418, - "support": 13607.0 + "f1-score": 0.9126989456718825, + "precision": 0.9108638831177535, + "recall": 0.9145414171320907, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9998644925245043, - "precision": 1.0, - "recall": 0.9997290217685846, - "support": 11071.0 + "f1-score": 0.9989130842172332, + "precision": 0.9998499399759904, + "recall": 0.9979779824758481, + "support": 13353.0 }, - "eval_accuracy": 0.9096235083716198, - "eval_loss": 0.7609456181526184, + "eval_accuracy": 0.9114898295766904, + "eval_loss": 0.7884248495101929, "eval_macro avg": { - "f1-score": 0.8546279913649018, - "precision": 0.8583586731064896, - "recall": 0.8521796208135287, - "support": 32431.0 - }, - "eval_runtime": 4.9475, - "eval_samples_per_second": 16.17, - "eval_steps_per_second": 2.021, + "f1-score": 0.8568665078080582, + "precision": 0.8500772173010988, + "recall": 0.8648070238078861, + "support": 36380.0 + }, + "eval_runtime": 5.1019, + "eval_samples_per_second": 15.876, + "eval_steps_per_second": 2.156, "eval_weighted avg": { - "f1-score": 0.907913975857415, - "precision": 0.9069505627094704, - "recall": 0.9096235083716198, - "support": 32431.0 + "f1-score": 0.9106621435750196, + "precision": 0.9102832766025237, + "recall": 0.9114898295766904, + "support": 36380.0 }, "step": 1863 }, { "epoch": 24.0, "eval_B-Claim": { - "f1-score": 0.6968749999999999, - "precision": 0.6904024767801857, - "recall": 0.7034700315457413, - "support": 317.0 + "f1-score": 0.7227866473149492, + "precision": 0.7114285714285714, + "recall": 0.7345132743362832, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9131832797427654, - "precision": 0.9102564102564102, - "recall": 0.9161290322580645, - "support": 155.0 + "f1-score": 0.9300911854103343, + "precision": 0.9053254437869822, + "recall": 0.95625, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8922702373706634, - "precision": 0.8939024390243903, - "recall": 0.8906439854191981, - "support": 823.0 + "f1-score": 0.9065520945220195, + "precision": 0.9163952225841476, + "recall": 0.8969181721572795, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6863417982155113, - "precision": 0.6821282401091405, - "recall": 0.6906077348066298, - "support": 4344.0 + "f1-score": 0.700562572975268, + "precision": 0.6987084480203261, + "recall": 0.70242656449553, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.903548463919981, - "precision": 0.9098321342925659, - "recall": 0.8973509933774835, - "support": 2114.0 + "f1-score": 0.9266207551650438, + "precision": 0.8937242327072835, + "recall": 0.9620315581854043, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9113179092948837, - "precision": 0.9122238586156112, - "recall": 0.910413757624752, - "support": 13607.0 + "f1-score": 0.9129007142615349, + "precision": 0.9185286103542234, + "recall": 0.9073413632999126, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9999096657633243, - "precision": 1.0, - "recall": 0.999819347845723, - "support": 11071.0 + "f1-score": 0.9998876698992775, + "precision": 0.9998502321401828, + "recall": 0.9999251104620684, + "support": 13353.0 }, - "eval_accuracy": 0.908143443002066, - "eval_loss": 0.7767113447189331, + "eval_accuracy": 0.916245189664651, + "eval_loss": 0.7238907814025879, "eval_macro avg": { - "f1-score": 0.857635193472447, - "precision": 0.8569636512969006, - "recall": 0.8583478404110846, - "support": 32431.0 - }, - "eval_runtime": 4.9392, - "eval_samples_per_second": 16.197, - "eval_steps_per_second": 2.025, + "f1-score": 0.8713430913640611, + "precision": 0.8634229658602453, + "recall": 0.8799151489909255, + "support": 36380.0 + }, + "eval_runtime": 5.0787, + "eval_samples_per_second": 15.949, + "eval_steps_per_second": 2.166, "eval_weighted avg": { - "f1-score": 0.9083489289362786, - "precision": 0.9085692481624786, - "recall": 0.908143443002066, - "support": 32431.0 + "f1-score": 0.9163125952217944, + "precision": 0.9165644068758583, + "recall": 0.916245189664651, + "support": 36380.0 }, "step": 1944 }, { "epoch": 24.69, - "grad_norm": 0.007521071936935186, + "grad_norm": 25.948219299316406, "learning_rate": 1.0123456790123458e-05, - "loss": 0.0037, + "loss": 0.0057, "step": 2000 }, { "epoch": 25.0, "eval_B-Claim": { - "f1-score": 0.6971080669710807, - "precision": 0.6735294117647059, - "recall": 0.722397476340694, - "support": 317.0 + "f1-score": 0.7191679049034176, + "precision": 0.7245508982035929, + "recall": 0.7138643067846607, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9196141479099678, - "precision": 0.9166666666666666, - "recall": 0.9225806451612903, - "support": 155.0 + "f1-score": 0.9386503067484663, + "precision": 0.9216867469879518, + "recall": 0.95625, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8868388683886839, - "precision": 0.8978829389788294, - "recall": 0.8760631834750912, - "support": 823.0 + "f1-score": 0.9080276448697502, + "precision": 0.9085106382978724, + "recall": 0.9075451647183846, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6842635052007605, - "precision": 0.6654339786817489, - "recall": 0.7041896869244936, - "support": 4344.0 + "f1-score": 0.7102681491170699, + "precision": 0.7278820375335121, + "recall": 0.6934865900383141, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9163140573577808, - "precision": 0.9107476635514019, - "recall": 0.9219489120151372, - "support": 2114.0 + "f1-score": 0.9318670810423142, + "precision": 0.9044083526682135, + "recall": 0.9610453648915187, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9042813682570305, - "precision": 0.9130890836892186, - "recall": 0.8956419489968399, - "support": 13607.0 + "f1-score": 0.9183714534844725, + "precision": 0.9154242160861136, + "recall": 0.9213377296278851, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9989601699896017, + "f1-score": 0.9999625538288709, "precision": 1.0, - "recall": 0.9979225002258152, - "support": 11071.0 + "recall": 0.9999251104620684, + "support": 13353.0 }, - "eval_accuracy": 0.9045666183589776, - "eval_loss": 0.7725499272346497, + "eval_accuracy": 0.9208356239692138, + "eval_loss": 0.6898869872093201, "eval_macro avg": { - "f1-score": 0.8581971691535578, - "precision": 0.8539071061903674, - "recall": 0.8629634790199088, - "support": 32431.0 - }, - "eval_runtime": 4.957, - "eval_samples_per_second": 16.139, - "eval_steps_per_second": 2.017, + "f1-score": 0.8751878705706231, + "precision": 0.8717804128253224, + "recall": 0.8790648952175474, + "support": 36380.0 + }, + "eval_runtime": 5.0592, + "eval_samples_per_second": 16.011, + "eval_steps_per_second": 2.174, "eval_weighted avg": { - "f1-score": 0.9055214160067376, - "precision": 0.906722541443147, - "recall": 0.9045666183589776, - "support": 32431.0 + "f1-score": 0.9201627360934556, + "precision": 0.9197045181816026, + "recall": 0.9208356239692138, + "support": 36380.0 }, "step": 2025 }, { "epoch": 26.0, "eval_B-Claim": { - "f1-score": 0.6904376012965964, - "precision": 0.71, - "recall": 0.6719242902208202, - "support": 317.0 + "f1-score": 0.7195301027900147, + "precision": 0.716374269005848, + "recall": 0.7227138643067846, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9022082018927444, - "precision": 0.8827160493827161, - "recall": 0.9225806451612903, - "support": 155.0 + "f1-score": 0.9226006191950465, + "precision": 0.9141104294478528, + "recall": 0.93125, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8958458759783263, - "precision": 0.8878281622911695, - "recall": 0.9040097205346294, - "support": 823.0 + "f1-score": 0.9099627064464572, + "precision": 0.9123931623931624, + "recall": 0.9075451647183846, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6721173500060118, - "precision": 0.7034986156556758, - "recall": 0.6434162062615101, - "support": 4344.0 + "f1-score": 0.7121407121407122, + "precision": 0.717682663207955, + "recall": 0.7066836951894423, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9087940284581292, - "precision": 0.8964565117349287, - "recall": 0.9214758751182592, - "support": 2114.0 + "f1-score": 0.9204819277108435, + "precision": 0.9000942507068803, + "recall": 0.9418145956607495, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.911746539258075, - "precision": 0.9016240298936476, - "recall": 0.9220989196736974, - "support": 13607.0 + "f1-score": 0.9201467173671636, + "precision": 0.9203015616585891, + "recall": 0.9199919251732723, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9998193315266486, - "precision": 0.9999096576023128, - "recall": 0.9997290217685846, - "support": 11071.0 + "f1-score": 0.9993255395683452, + "precision": 1.0, + "recall": 0.9986519883172321, + "support": 13353.0 }, - "eval_accuracy": 0.9083284511732601, - "eval_loss": 0.7629147171974182, + "eval_accuracy": 0.9204233095107202, + "eval_loss": 0.6385065317153931, "eval_macro avg": { - "f1-score": 0.8544241326309331, - "precision": 0.8545761466514928, - "recall": 0.8550335255341129, - "support": 32431.0 - }, - "eval_runtime": 4.9235, - "eval_samples_per_second": 16.249, - "eval_steps_per_second": 2.031, + "f1-score": 0.8720269036026548, + "precision": 0.8687080480600411, + "recall": 0.8755216047665523, + "support": 36380.0 + }, + "eval_runtime": 5.1139, + "eval_samples_per_second": 15.839, + "eval_steps_per_second": 2.151, "eval_weighted avg": { - "f1-score": 0.9069099155264195, - "precision": 0.9059874383002041, - "recall": 0.9083284511732601, - "support": 32431.0 + "f1-score": 0.9202441017693053, + "precision": 0.9201301899865153, + "recall": 0.9204233095107202, + "support": 36380.0 }, "step": 2106 }, { "epoch": 27.0, "eval_B-Claim": { - "f1-score": 0.701639344262295, - "precision": 0.7303754266211604, - "recall": 0.6750788643533123, - "support": 317.0 + "f1-score": 0.7157894736842104, + "precision": 0.7300613496932515, + "recall": 0.7020648967551623, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9166666666666666, - "precision": 0.910828025477707, - "recall": 0.9225806451612903, - "support": 155.0 + "f1-score": 0.914826498422713, + "precision": 0.9235668789808917, + "recall": 0.90625, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.9017964071856287, - "precision": 0.8890200708382526, - "recall": 0.9149453219927096, - "support": 823.0 + "f1-score": 0.9131121642969985, + "precision": 0.9050104384133612, + "recall": 0.9213602550478215, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6878357407186343, - "precision": 0.7143565583932556, - "recall": 0.6632136279926335, - "support": 4344.0 + "f1-score": 0.7079230080572964, + "precision": 0.746342614440774, + "recall": 0.6732652192422307, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9198392054859305, - "precision": 0.9196217494089834, - "recall": 0.9200567644276254, - "support": 2114.0 + "f1-score": 0.922322960058809, + "precision": 0.9167072576716999, + "recall": 0.928007889546351, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9161731041749935, - "precision": 0.906065832973983, - "recall": 0.9265084147865069, - "support": 13607.0 + "f1-score": 0.9238369972479192, + "precision": 0.9106419139756831, + "recall": 0.9374200928605073, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9997741950051934, - "precision": 0.9997290462427746, - "recall": 0.999819347845723, - "support": 11071.0 + "f1-score": 0.9998876530726886, + "precision": 1.0, + "recall": 0.9997753313862053, + "support": 13353.0 }, - "eval_accuracy": 0.913076994233912, - "eval_loss": 0.7461331486701965, + "eval_accuracy": 0.9229246838922485, + "eval_loss": 0.6971738934516907, "eval_macro avg": { - "f1-score": 0.8633892376427632, - "precision": 0.8671423871365881, - "recall": 0.8603147123656859, - "support": 32431.0 - }, - "eval_runtime": 4.9043, - "eval_samples_per_second": 16.312, - "eval_steps_per_second": 2.039, + "f1-score": 0.8710998221200908, + "precision": 0.876047207596523, + "recall": 0.8668776692626111, + "support": 36380.0 + }, + "eval_runtime": 5.1047, + "eval_samples_per_second": 15.868, + "eval_steps_per_second": 2.155, "eval_weighted avg": { - "f1-score": 0.9119067413298663, - "precision": 0.9111175788791426, - "recall": 0.913076994233912, - "support": 32431.0 + "f1-score": 0.9215282605927277, + "precision": 0.920789602130938, + "recall": 0.9229246838922485, + "support": 36380.0 }, "step": 2187 }, { "epoch": 28.0, "eval_B-Claim": { - "f1-score": 0.69375, - "precision": 0.6873065015479877, - "recall": 0.7003154574132492, - "support": 317.0 + "f1-score": 0.701095461658842, + "precision": 0.7466666666666667, + "recall": 0.6607669616519174, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9096989966555183, - "precision": 0.9444444444444444, - "recall": 0.8774193548387097, - "support": 155.0 + "f1-score": 0.929663608562691, + "precision": 0.9101796407185628, + "recall": 0.95, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.8948004836759371, - "precision": 0.8904933814681107, - "recall": 0.8991494532199271, - "support": 823.0 + "f1-score": 0.9096605744125327, + "precision": 0.8942505133470225, + "recall": 0.9256110520722636, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6768418636676612, - "precision": 0.6748283752860412, - "recall": 0.6788674033149171, - "support": 4344.0 + "f1-score": 0.6865845152290164, + "precision": 0.7564036885245902, + "recall": 0.6285653469561515, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9071392910634049, - "precision": 0.9603594080338267, - "recall": 0.8595080416272469, - "support": 2114.0 + "f1-score": 0.9204761904761904, + "precision": 0.889963167587477, + "recall": 0.953155818540434, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9102119741690686, - "precision": 0.9037820605709318, - "recall": 0.9167340339531124, - "support": 13607.0 + "f1-score": 0.9194842783844231, + "precision": 0.8993115872096764, + "recall": 0.9405827333288473, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9998193315266486, - "precision": 0.9999096576023128, - "recall": 0.9997290217685846, - "support": 11071.0 + "f1-score": 0.9987627938364638, + "precision": 1.0, + "recall": 0.9975286452482588, + "support": 13353.0 }, - "eval_accuracy": 0.9067250470229101, - "eval_loss": 0.7586968541145325, + "eval_accuracy": 0.918938977460143, + "eval_loss": 0.7604945302009583, "eval_macro avg": { - "f1-score": 0.8560374201083196, - "precision": 0.8658748327076651, - "recall": 0.847388966590821, - "support": 32431.0 - }, - "eval_runtime": 5.0076, - "eval_samples_per_second": 15.976, - "eval_steps_per_second": 1.997, + "f1-score": 0.8665324889371655, + "precision": 0.8709678948648565, + "recall": 0.8651729368282675, + "support": 36380.0 + }, + "eval_runtime": 5.0746, + "eval_samples_per_second": 15.962, + "eval_steps_per_second": 2.168, "eval_weighted avg": { - "f1-score": 0.9068326774221362, - "precision": 0.9073589365470631, - "recall": 0.9067250470229101, - "support": 32431.0 + "f1-score": 0.9163178491862402, + "precision": 0.9157871854220297, + "recall": 0.918938977460143, + "support": 36380.0 }, "step": 2268 }, { "epoch": 29.0, "eval_B-Claim": { - "f1-score": 0.6825396825396827, - "precision": 0.6869009584664537, - "recall": 0.6782334384858044, - "support": 317.0 + "f1-score": 0.7001569858712716, + "precision": 0.7483221476510067, + "recall": 0.6578171091445427, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.9055374592833876, - "precision": 0.9144736842105263, - "recall": 0.896774193548387, - "support": 155.0 + "f1-score": 0.9129129129129129, + "precision": 0.8786127167630058, + "recall": 0.95, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.893719806763285, - "precision": 0.8883553421368547, - "recall": 0.8991494532199271, - "support": 823.0 + "f1-score": 0.9115646258503401, + "precision": 0.8979381443298969, + "recall": 0.9256110520722636, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6749443598453789, - "precision": 0.6870975435249225, - "recall": 0.6632136279926335, - "support": 4344.0 + "f1-score": 0.6709571729464077, + "precision": 0.7450623700623701, + "recall": 0.6102596849723286, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.9090909090909092, - "precision": 0.9196515004840271, - "recall": 0.8987701040681173, - "support": 2114.0 + "f1-score": 0.9053708439897699, + "precision": 0.8565772107347118, + "recall": 0.9600591715976331, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9108261693137113, - "precision": 0.9029974720115566, - "recall": 0.9187917983390902, - "support": 13607.0 + "f1-score": 0.9177951303087214, + "precision": 0.8991607488702389, + "recall": 0.9372182221923154, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9980995475113122, + "f1-score": 0.9990630036355459, "precision": 1.0, - "recall": 0.9962063047601842, - "support": 11071.0 + "recall": 0.9981277615517112, + "support": 13353.0 }, - "eval_accuracy": 0.9067250470229101, - "eval_loss": 0.796188473701477, + "eval_accuracy": 0.9157778999450248, + "eval_loss": 0.7660219073295593, "eval_macro avg": { - "f1-score": 0.8535368477639526, - "precision": 0.857068071547763, - "recall": 0.8501627029163064, - "support": 32431.0 - }, - "eval_runtime": 4.9618, - "eval_samples_per_second": 16.123, - "eval_steps_per_second": 2.015, + "f1-score": 0.8596886679307099, + "precision": 0.86081047691589, + "recall": 0.8627275716472563, + "support": 36380.0 + }, + "eval_runtime": 5.0668, + "eval_samples_per_second": 15.986, + "eval_steps_per_second": 2.171, "eval_weighted avg": { - "f1-score": 0.9062194518780211, - "precision": 0.9058490047862192, - "recall": 0.9067250470229101, - "support": 32431.0 + "f1-score": 0.9128447549856632, + "precision": 0.912371867054256, + "recall": 0.9157778999450248, + "support": 36380.0 }, "step": 2349 }, { "epoch": 30.0, "eval_B-Claim": { - "f1-score": 0.6710097719869706, - "precision": 0.6936026936026936, - "recall": 0.6498422712933754, - "support": 317.0 + "f1-score": 0.7267355982274741, + "precision": 0.727810650887574, + "recall": 0.7256637168141593, + "support": 339.0 }, "eval_B-MajorClaim": { - "f1-score": 0.89375, - "precision": 0.8666666666666667, - "recall": 0.9225806451612903, - "support": 155.0 + "f1-score": 0.9268292682926829, + "precision": 0.9047619047619048, + "recall": 0.95, + "support": 160.0 }, "eval_B-Premise": { - "f1-score": 0.893848009650181, - "precision": 0.8874251497005988, - "recall": 0.9003645200486027, - "support": 823.0 + "f1-score": 0.9119999999999999, + "precision": 0.9154175588865097, + "recall": 0.9086078639744952, + "support": 941.0 }, "eval_I-Claim": { - "f1-score": 0.6677052127022169, - "precision": 0.69632591852037, - "recall": 0.6413443830570903, - "support": 4344.0 + "f1-score": 0.7044324324324324, + "precision": 0.7157293497363796, + "recall": 0.6934865900383141, + "support": 4698.0 }, "eval_I-MajorClaim": { - "f1-score": 0.8996555683122847, - "precision": 0.8741633199464525, - "recall": 0.9266792809839167, - "support": 2114.0 + "f1-score": 0.9097726740098431, + "precision": 0.8669048682447521, + "recall": 0.9571005917159763, + "support": 2028.0 }, "eval_I-Premise": { - "f1-score": 0.9106609341980617, - "precision": 0.903027675410073, - "recall": 0.9184243404130227, - "support": 13607.0 + "f1-score": 0.9179300586213867, + "precision": 0.9191687470481075, + "recall": 0.9166947042594711, + "support": 14861.0 }, "eval_O": { - "f1-score": 0.9988248056409328, - "precision": 0.9996381073011852, - "recall": 0.9980128263029536, - "support": 11071.0 + "f1-score": 0.9990630036355459, + "precision": 1.0, + "recall": 0.9981277615517112, + "support": 13353.0 }, - "eval_accuracy": 0.9059541796429342, - "eval_loss": 0.8091421127319336, + "eval_accuracy": 0.9181693238042881, + "eval_loss": 0.7438024878501892, "eval_macro avg": { - "f1-score": 0.8479220432129496, - "precision": 0.8458356473068627, - "recall": 0.8510354667514646, - "support": 32431.0 - }, - "eval_runtime": 5.049, - "eval_samples_per_second": 15.845, - "eval_steps_per_second": 1.981, + "f1-score": 0.8709661478884808, + "precision": 0.8642561542236039, + "recall": 0.8785258897648754, + "support": 36380.0 + }, + "eval_runtime": 5.0797, + "eval_samples_per_second": 15.946, + "eval_steps_per_second": 2.165, "eval_weighted avg": { - "f1-score": 0.9046472304257149, - "precision": 0.9038225277994681, - "recall": 0.9059541796429342, - "support": 32431.0 + "f1-score": 0.9177882762480303, + "precision": 0.9177087395840788, + "recall": 0.9181693238042881, + "support": 36380.0 }, "step": 2430 } @@ -1932,7 +1932,7 @@ "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, - "total_flos": 4313724381540000.0, + "total_flos": 4300327721970000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null