{ "best_metric": null, "best_model_checkpoint": null, "epoch": 43.0, "eval_steps": 500, "global_step": 3483, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_B-Claim": { "f1-score": 0.17153996101364524, "precision": 0.25287356321839083, "recall": 0.12979351032448377, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.08383233532934131, "precision": 1.0, "recall": 0.04375, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8469620831429876, "precision": 0.7427884615384616, "recall": 0.9851222104144527, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.5420081967213116, "precision": 0.6803858520900321, "recall": 0.4504044274159217, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8343351767251743, "precision": 0.8141717503519474, "recall": 0.8555226824457594, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9004323458767015, "precision": 0.8590809093131264, "recall": 0.9459659511472983, "support": 14861.0 }, "eval_O": { "f1-score": 0.9988389078242631, "precision": 0.9991008541885209, "recall": 0.9985770987793006, "support": 13353.0 }, "eval_accuracy": 0.8856789444749863, "eval_loss": 0.28347474336624146, "eval_macro avg": { "f1-score": 0.6254212866619178, "precision": 0.76405734152864, "recall": 0.629876554361031, "support": 36380.0 }, "eval_runtime": 5.0448, "eval_samples_per_second": 16.056, "eval_steps_per_second": 2.18, "eval_weighted avg": { "f1-score": 0.8748148382495958, "precision": 0.8768575102351055, "recall": 0.8856789444749863, "support": 36380.0 }, "step": 81 }, { "epoch": 2.0, "eval_B-Claim": { "f1-score": 0.5137931034482759, "precision": 0.6182572614107884, "recall": 0.43952802359882004, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.7622641509433963, "precision": 0.9619047619047619, "recall": 0.63125, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8853910477127397, "precision": 0.8241758241758241, "recall": 0.9564293304994687, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.5413588470388762, "precision": 0.6279853891542568, "recall": 0.47573435504469985, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.7949260042283298, "precision": 0.856492027334852, "recall": 0.7416173570019724, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9007064868336545, "precision": 0.8614779777627618, "recall": 0.9436780835744566, "support": 14861.0 }, "eval_O": { "f1-score": 0.9988389947942025, "precision": 0.9990260713215463, "recall": 0.9986519883172321, "support": 13353.0 }, "eval_accuracy": 0.8864211105002748, "eval_loss": 0.2809496521949768, "eval_macro avg": { "f1-score": 0.7710398049999251, "precision": 0.8213313304378275, "recall": 0.7409841625766643, "support": 36380.0 }, "eval_runtime": 5.0579, "eval_samples_per_second": 16.014, "eval_steps_per_second": 2.175, "eval_weighted avg": { "f1-score": 0.8798131143596267, "precision": 0.8787439244541853, "recall": 0.8864211105002748, "support": 36380.0 }, "step": 162 }, { "epoch": 3.0, "eval_B-Claim": { "f1-score": 0.6401137980085349, "precision": 0.6181318681318682, "recall": 0.6637168141592921, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.7918781725888324, "precision": 0.6666666666666666, "recall": 0.975, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8866442199775534, "precision": 0.93935790725327, "recall": 0.8395324123273114, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6254390633315594, "precision": 0.6255056418990845, "recall": 0.6253724989357173, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.76480605487228, "precision": 0.6205096714768191, "recall": 0.9965483234714004, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9049205625504156, "precision": 0.9449897450922942, "recall": 0.868111163447951, "support": 14861.0 }, "eval_O": { "f1-score": 0.9988758992805756, "precision": 0.9995500562429697, "recall": 0.9982026510896428, "support": 13353.0 }, "eval_accuracy": 0.8894997251236944, "eval_loss": 0.318760484457016, "eval_macro avg": { "f1-score": 0.8018111100871074, "precision": 0.7735302223947104, "recall": 0.852354837633045, "support": 36380.0 }, "eval_runtime": 5.0862, "eval_samples_per_second": 15.925, "eval_steps_per_second": 2.163, "eval_weighted avg": { "f1-score": 0.8920666591013163, "precision": 0.9012548868310761, "recall": 0.8894997251236944, "support": 36380.0 }, "step": 243 }, { "epoch": 4.0, "eval_B-Claim": { "f1-score": 0.7034883720930232, "precision": 0.6934097421203438, "recall": 0.7138643067846607, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.8958990536277602, "precision": 0.9044585987261147, "recall": 0.8875, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9051172707889126, "precision": 0.9080213903743316, "recall": 0.9022316684378321, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6932699265998139, "precision": 0.6739698492462312, "recall": 0.7137079608343976, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8904009720534629, "precision": 0.877815045519885, "recall": 0.903353057199211, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9127480293558032, "precision": 0.9218310342461052, "recall": 0.9038422717179194, "support": 14861.0 }, "eval_O": { "f1-score": 0.9981619715668254, "precision": 0.9999248459341651, "recall": 0.9964053021792856, "support": 13353.0 }, "eval_accuracy": 0.9113523914238593, "eval_loss": 0.2517726719379425, "eval_macro avg": { "f1-score": 0.8570122280122289, "precision": 0.8542043580238825, "recall": 0.8601292238790438, "support": 36380.0 }, "eval_runtime": 5.062, "eval_samples_per_second": 16.002, "eval_steps_per_second": 2.173, "eval_weighted avg": { "f1-score": 0.9122886837716081, "precision": 0.9134709768686261, "recall": 0.9113523914238593, "support": 36380.0 }, "step": 324 }, { "epoch": 5.0, "eval_B-Claim": { "f1-score": 0.6590538336052201, "precision": 0.7372262773722628, "recall": 0.5958702064896755, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9078947368421054, "precision": 0.9583333333333334, "recall": 0.8625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9066802651708312, "precision": 0.8715686274509804, "recall": 0.944739638682253, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6513589503280225, "precision": 0.7243355914538823, "recall": 0.5917411664538101, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8981975120588981, "precision": 0.9256933542647828, "recall": 0.8722879684418146, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9158848058378942, "precision": 0.8877170824123777, "recall": 0.9458986609245676, "support": 14861.0 }, "eval_O": { "f1-score": 0.9994384336041331, "precision": 0.9992513849378649, "recall": 0.9996255523103422, "support": 13353.0 }, "eval_accuracy": 0.9121220450797142, "eval_loss": 0.2739432156085968, "eval_macro avg": { "f1-score": 0.8483583624924435, "precision": 0.8720179501750691, "recall": 0.8303804561860663, "support": 36380.0 }, "eval_runtime": 5.0932, "eval_samples_per_second": 15.904, "eval_steps_per_second": 2.16, "eval_weighted avg": { "f1-score": 0.9087400479269403, "precision": 0.9081638580455985, "recall": 0.9121220450797142, "support": 36380.0 }, "step": 405 }, { "epoch": 6.0, "eval_B-Claim": { "f1-score": 0.6675461741424802, "precision": 0.60381861575179, "recall": 0.7463126843657817, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.887608069164265, "precision": 0.8235294117647058, "recall": 0.9625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8737316798196167, "precision": 0.9303721488595438, "recall": 0.8235919234856536, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6529236868186323, "precision": 0.6109050445103857, "recall": 0.7011494252873564, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8689320388349515, "precision": 0.7863418530351438, "recall": 0.9709072978303748, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.8937734066010992, "precision": 0.9312910284463894, "recall": 0.8591615638247763, "support": 14861.0 }, "eval_O": { "f1-score": 0.9988009592326139, "precision": 0.9994750656167979, "recall": 0.9981277615517112, "support": 13353.0 }, "eval_accuracy": 0.8944749862561847, "eval_loss": 0.37784042954444885, "eval_macro avg": { "f1-score": 0.8347594306590942, "precision": 0.8122475954263937, "recall": 0.8659643794779507, "support": 36380.0 }, "eval_runtime": 5.0734, "eval_samples_per_second": 15.966, "eval_steps_per_second": 2.168, "eval_weighted avg": { "f1-score": 0.8971820344797716, "precision": 0.9033144340485707, "recall": 0.8944749862561847, "support": 36380.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 21.778270721435547, "learning_rate": 1.7530864197530865e-05, "loss": 0.2481, "step": 500 }, { "epoch": 7.0, "eval_B-Claim": { "f1-score": 0.6489859594383774, "precision": 0.6887417218543046, "recall": 0.6135693215339233, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.8852459016393444, "precision": 0.9310344827586207, "recall": 0.84375, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9011898603207449, "precision": 0.8780241935483871, "recall": 0.9256110520722636, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.626564114338193, "precision": 0.6800398704211313, "recall": 0.5808854831843338, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8813559322033898, "precision": 0.9049350649350649, "recall": 0.8589743589743589, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9073595340161655, "precision": 0.8831698305516626, "recall": 0.9329116479375547, "support": 14861.0 }, "eval_O": { "f1-score": 0.9981994148098132, "precision": 1.0, "recall": 0.9964053021792856, "support": 13353.0 }, "eval_accuracy": 0.9030786146234194, "eval_loss": 0.4109443426132202, "eval_macro avg": { "f1-score": 0.8355572452522898, "precision": 0.8522778805813102, "recall": 0.8217295951259599, "support": 36380.0 }, "eval_runtime": 5.0454, "eval_samples_per_second": 16.054, "eval_steps_per_second": 2.18, "eval_weighted avg": { "f1-score": 0.9003265559019435, "precision": 0.8992988510674564, "recall": 0.9030786146234194, "support": 36380.0 }, "step": 567 }, { "epoch": 8.0, "eval_B-Claim": { "f1-score": 0.7029972752043596, "precision": 0.6531645569620254, "recall": 0.7610619469026548, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9134328358208955, "precision": 0.8742857142857143, "recall": 0.95625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8914728682170544, "precision": 0.930635838150289, "recall": 0.8554729011689692, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6790487421383647, "precision": 0.6307046367287331, "recall": 0.7354193273733504, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8939393939393939, "precision": 0.836340206185567, "recall": 0.9600591715976331, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.8993180625983564, "precision": 0.9362166885102665, "recall": 0.8652176838705337, "support": 14861.0 }, "eval_O": { "f1-score": 0.9979819119515658, "precision": 0.9960462513987318, "recall": 0.9999251104620684, "support": 13353.0 }, "eval_accuracy": 0.902363936228697, "eval_loss": 0.4763801097869873, "eval_macro avg": { "f1-score": 0.8540272985528559, "precision": 0.8367705560316182, "recall": 0.8762008773393156, "support": 36380.0 }, "eval_runtime": 5.0791, "eval_samples_per_second": 15.948, "eval_steps_per_second": 2.166, "eval_weighted avg": { "f1-score": 0.9048169208096876, "precision": 0.9101018951943243, "recall": 0.902363936228697, "support": 36380.0 }, "step": 648 }, { "epoch": 9.0, "eval_B-Claim": { "f1-score": 0.6884779516358464, "precision": 0.6648351648351648, "recall": 0.7138643067846607, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.8860759493670887, "precision": 0.8974358974358975, "recall": 0.875, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8984416980118215, "precision": 0.908695652173913, "recall": 0.8884165781083954, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6685829873309911, "precision": 0.6687965921192758, "recall": 0.6683695189442316, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8819011041766683, "precision": 0.8592142188961647, "recall": 0.9058185404339251, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9072658065820057, "precision": 0.9083367057871308, "recall": 0.9061974295134917, "support": 14861.0 }, "eval_O": { "f1-score": 0.9971465044679734, "precision": 0.9998494089300505, "recall": 0.9944581741930653, "support": 13353.0 }, "eval_accuracy": 0.9054700384826828, "eval_loss": 0.43875375390052795, "eval_macro avg": { "f1-score": 0.8468417145103421, "precision": 0.843880520025371, "recall": 0.8503035068539672, "support": 36380.0 }, "eval_runtime": 5.0863, "eval_samples_per_second": 15.925, "eval_steps_per_second": 2.163, "eval_weighted avg": { "f1-score": 0.9056589487317133, "precision": 0.9059462683069767, "recall": 0.9054700384826828, "support": 36380.0 }, "step": 729 }, { "epoch": 10.0, "eval_B-Claim": { "f1-score": 0.6910569105691057, "precision": 0.6390977443609023, "recall": 0.7522123893805309, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9046153846153846, "precision": 0.8909090909090909, "recall": 0.91875, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8888888888888888, "precision": 0.9213226909920182, "recall": 0.8586609989373007, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6645316253002403, "precision": 0.6271250472232717, "recall": 0.7066836951894423, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.886980737990253, "precision": 0.8377904427882508, "recall": 0.9423076923076923, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.8983343700324833, "precision": 0.9233501456276195, "recall": 0.8746383150528229, "support": 14861.0 }, "eval_O": { "f1-score": 0.9975225225225225, "precision": 1.0, "recall": 0.9950572904965176, "support": 13353.0 }, "eval_accuracy": 0.8995601979109401, "eval_loss": 0.5381875038146973, "eval_macro avg": { "f1-score": 0.8474186342741253, "precision": 0.8342278802715933, "recall": 0.8640443401949011, "support": 36380.0 }, "eval_runtime": 5.099, "eval_samples_per_second": 15.885, "eval_steps_per_second": 2.157, "eval_weighted avg": { "f1-score": 0.9017669881051834, "precision": 0.9056169116577473, "recall": 0.8995601979109401, "support": 36380.0 }, "step": 810 }, { "epoch": 11.0, "eval_B-Claim": { "f1-score": 0.7103825136612023, "precision": 0.6615776081424937, "recall": 0.7669616519174042, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9032258064516129, "precision": 0.850828729281768, "recall": 0.9625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8955223880597015, "precision": 0.9331797235023042, "recall": 0.8607863974495218, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.685932169375878, "precision": 0.6488230827638573, "recall": 0.7275436355896125, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8921899617375647, "precision": 0.820703933747412, "recall": 0.9773175542406312, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9036754507628294, "precision": 0.932183990271121, "recall": 0.8768588924029338, "support": 14861.0 }, "eval_O": { "f1-score": 0.9971084156370874, "precision": 1.0, "recall": 0.9942335055792706, "support": 13353.0 }, "eval_accuracy": 0.9051951621770203, "eval_loss": 0.5869538187980652, "eval_macro avg": { "f1-score": 0.8554338150979823, "precision": 0.8353281525298509, "recall": 0.880885948168482, "support": 36380.0 }, "eval_runtime": 5.0923, "eval_samples_per_second": 15.906, "eval_steps_per_second": 2.16, "eval_weighted avg": { "f1-score": 0.9071963301332511, "precision": 0.9114149044954519, "recall": 0.9051951621770203, "support": 36380.0 }, "step": 891 }, { "epoch": 12.0, "eval_B-Claim": { "f1-score": 0.6758409785932722, "precision": 0.7015873015873015, "recall": 0.6519174041297935, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.916923076923077, "precision": 0.9030303030303031, "recall": 0.93125, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8989473684210527, "precision": 0.8905109489051095, "recall": 0.9075451647183846, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6533197602081212, "precision": 0.6970794110547912, "recall": 0.6147296722009365, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9200480192076831, "precision": 0.8965839962564343, "recall": 0.9447731755424064, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9082653940478558, "precision": 0.8948605759811924, "recall": 0.922077922077922, "support": 14861.0 }, "eval_O": { "f1-score": 0.9988389947942025, "precision": 0.9990260713215463, "recall": 0.9986519883172321, "support": 13353.0 }, "eval_accuracy": 0.9089059923034635, "eval_loss": 0.5580677390098572, "eval_macro avg": { "f1-score": 0.8531690845993235, "precision": 0.854668372590954, "recall": 0.852992189569525, "support": 36380.0 }, "eval_runtime": 5.0574, "eval_samples_per_second": 16.016, "eval_steps_per_second": 2.175, "eval_weighted avg": { "f1-score": 0.9068749310769137, "precision": 0.9057713940131727, "recall": 0.9089059923034635, "support": 36380.0 }, "step": 972 }, { "epoch": 12.35, "grad_norm": 0.45772114396095276, "learning_rate": 1.506172839506173e-05, "loss": 0.0314, "step": 1000 }, { "epoch": 13.0, "eval_B-Claim": { "f1-score": 0.6760563380281691, "precision": 0.72, "recall": 0.6371681415929203, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9171974522292993, "precision": 0.935064935064935, "recall": 0.9, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.905503634475597, "precision": 0.8852791878172589, "recall": 0.926673751328374, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6522588803310726, "precision": 0.7090727318170458, "recall": 0.6038739889314602, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9228121927236971, "precision": 0.9200980392156862, "recall": 0.9255424063116371, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9102441910145594, "precision": 0.8896312475909033, "recall": 0.9318350043738645, "support": 14861.0 }, "eval_O": { "f1-score": 0.9992880428673137, "precision": 1.0, "recall": 0.9985770987793006, "support": 13353.0 }, "eval_accuracy": 0.9106102253985706, "eval_loss": 0.5264647603034973, "eval_macro avg": { "f1-score": 0.8547658188099584, "precision": 0.8655923059294041, "recall": 0.8462386273310795, "support": 36380.0 }, "eval_runtime": 5.0562, "eval_samples_per_second": 16.02, "eval_steps_per_second": 2.176, "eval_weighted avg": { "f1-score": 0.9080379636257123, "precision": 0.9070294280758862, "recall": 0.9106102253985706, "support": 36380.0 }, "step": 1053 }, { "epoch": 14.0, "eval_B-Claim": { "f1-score": 0.6980609418282548, "precision": 0.6579634464751958, "recall": 0.7433628318584071, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9226006191950465, "precision": 0.9141104294478528, "recall": 0.93125, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8921568627450981, "precision": 0.9150837988826815, "recall": 0.8703506907545164, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6821321785751109, "precision": 0.6614677064587082, "recall": 0.7041294167730949, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9124910093502757, "precision": 0.8880074661689221, "recall": 0.9383629191321499, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9052323204796293, "precision": 0.9166609175577786, "recall": 0.894085189421977, "support": 14861.0 }, "eval_O": { "f1-score": 0.9980114808839531, "precision": 1.0, "recall": 0.9960308544896278, "support": 13353.0 }, "eval_accuracy": 0.9075865860362837, "eval_loss": 0.6585939526557922, "eval_macro avg": { "f1-score": 0.8586693447224812, "precision": 0.850470537855877, "recall": 0.8682245574899676, "support": 36380.0 }, "eval_runtime": 5.0606, "eval_samples_per_second": 16.006, "eval_steps_per_second": 2.174, "eval_weighted avg": { "f1-score": 0.9086880683615599, "precision": 0.9102351141829325, "recall": 0.9075865860362837, "support": 36380.0 }, "step": 1134 }, { "epoch": 15.0, "eval_B-Claim": { "f1-score": 0.6920731707317073, "precision": 0.7160883280757098, "recall": 0.6696165191740413, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.90282131661442, "precision": 0.9056603773584906, "recall": 0.9, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9076600209863588, "precision": 0.8963730569948186, "recall": 0.9192348565356004, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6644877718922575, "precision": 0.7061077844311378, "recall": 0.6275010642826735, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9016511127063891, "precision": 0.8758716875871687, "recall": 0.9289940828402367, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9134082844161443, "precision": 0.9005362280931206, "recall": 0.9266536572236054, "support": 14861.0 }, "eval_O": { "f1-score": 0.9987253505286046, "precision": 0.9999249305607687, "recall": 0.9975286452482588, "support": 13353.0 }, "eval_accuracy": 0.9114623419461243, "eval_loss": 0.6387954950332642, "eval_macro avg": { "f1-score": 0.854403861125126, "precision": 0.8572231990144593, "recall": 0.8527898321863451, "support": 36380.0 }, "eval_runtime": 5.0835, "eval_samples_per_second": 15.934, "eval_steps_per_second": 2.164, "eval_weighted avg": { "f1-score": 0.9096653222792656, "precision": 0.9087293921765042, "recall": 0.9114623419461243, "support": 36380.0 }, "step": 1215 }, { "epoch": 16.0, "eval_B-Claim": { "f1-score": 0.6602254428341385, "precision": 0.7269503546099291, "recall": 0.6047197640117994, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.8903225806451613, "precision": 0.92, "recall": 0.8625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9055441478439425, "precision": 0.8758689175769613, "recall": 0.9373007438894793, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6361776387050188, "precision": 0.7319302132373304, "recall": 0.5625798212005109, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9097614949594295, "precision": 0.9073075036782736, "recall": 0.9122287968441815, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9127908863717504, "precision": 0.8816300940438871, "recall": 0.9462351120382209, "support": 14861.0 }, "eval_O": { "f1-score": 0.9993256911665542, "precision": 0.9997751293006522, "recall": 0.9988766569310268, "support": 13353.0 }, "eval_accuracy": 0.9103353490929081, "eval_loss": 0.6975058317184448, "eval_macro avg": { "f1-score": 0.844878268932285, "precision": 0.8633517446352906, "recall": 0.8320629849878884, "support": 36380.0 }, "eval_runtime": 5.0731, "eval_samples_per_second": 15.967, "eval_steps_per_second": 2.168, "eval_weighted avg": { "f1-score": 0.9060232340867683, "precision": 0.9056723119856793, "recall": 0.9103353490929081, "support": 36380.0 }, "step": 1296 }, { "epoch": 17.0, "eval_B-Claim": { "f1-score": 0.6964285714285714, "precision": 0.7027027027027027, "recall": 0.6902654867256637, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.903954802259887, "precision": 0.8247422680412371, "recall": 1.0, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9045822102425876, "precision": 0.9179431072210066, "recall": 0.8916046758767269, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6845814977973568, "precision": 0.7092651757188498, "recall": 0.6615581098339719, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.8985378821444396, "precision": 0.8157683024939663, "recall": 1.0, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9151271572832584, "precision": 0.9186330349877949, "recall": 0.9116479375546733, "support": 14861.0 }, "eval_O": { "f1-score": 0.9988753936122357, "precision": 1.0, "recall": 0.9977533138620535, "support": 13353.0 }, "eval_accuracy": 0.9136888400219901, "eval_loss": 0.692401111125946, "eval_macro avg": { "f1-score": 0.8574410735383337, "precision": 0.8412935130236511, "recall": 0.878975646264727, "support": 36380.0 }, "eval_runtime": 5.0854, "eval_samples_per_second": 15.928, "eval_steps_per_second": 2.163, "eval_weighted avg": { "f1-score": 0.9128097974394099, "precision": 0.9132839716585025, "recall": 0.9136888400219901, "support": 36380.0 }, "step": 1377 }, { "epoch": 18.0, "eval_B-Claim": { "f1-score": 0.6927899686520376, "precision": 0.7391304347826086, "recall": 0.6519174041297935, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9192546583850932, "precision": 0.9135802469135802, "recall": 0.925, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9089016137428423, "precision": 0.8908163265306123, "recall": 0.9277364505844846, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6788553259141495, "precision": 0.7276046738072055, "recall": 0.6362281822051937, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9119420989143546, "precision": 0.8927727916863486, "recall": 0.9319526627218935, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9172837873578418, "precision": 0.9016052511860662, "recall": 0.9335172599421304, "support": 14861.0 }, "eval_O": { "f1-score": 0.9990254872563717, "precision": 1.0, "recall": 0.9980528720137797, "support": 13353.0 }, "eval_accuracy": 0.915915338097856, "eval_loss": 0.6629185676574707, "eval_macro avg": { "f1-score": 0.861150420031813, "precision": 0.866501389272346, "recall": 0.8577721187996108, "support": 36380.0 }, "eval_runtime": 5.0772, "eval_samples_per_second": 15.954, "eval_steps_per_second": 2.167, "eval_weighted avg": { "f1-score": 0.9138987233713269, "precision": 0.9130176425817779, "recall": 0.915915338097856, "support": 36380.0 }, "step": 1458 }, { "epoch": 18.52, "grad_norm": 34.05133056640625, "learning_rate": 1.2592592592592593e-05, "loss": 0.0105, "step": 1500 }, { "epoch": 19.0, "eval_B-Claim": { "f1-score": 0.6967340590979781, "precision": 0.7368421052631579, "recall": 0.6607669616519174, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9244712990936558, "precision": 0.8947368421052632, "recall": 0.95625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9071840587309911, "precision": 0.8954451345755694, "recall": 0.9192348565356004, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6769585253456222, "precision": 0.7378201908588649, "recall": 0.6253724989357173, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9125809435707678, "precision": 0.8593205574912892, "recall": 0.972879684418146, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.916641828117238, "precision": 0.9025042389461327, "recall": 0.9312293923692887, "support": 14861.0 }, "eval_O": { "f1-score": 0.9990254872563717, "precision": 1.0, "recall": 0.9980528720137797, "support": 13353.0 }, "eval_accuracy": 0.9158603628367235, "eval_loss": 0.69818514585495, "eval_macro avg": { "f1-score": 0.8619423144589463, "precision": 0.8609527241771824, "recall": 0.8662551808463499, "support": 36380.0 }, "eval_runtime": 5.0766, "eval_samples_per_second": 15.956, "eval_steps_per_second": 2.167, "eval_weighted avg": { "f1-score": 0.9134424215990907, "precision": 0.912854812597098, "recall": 0.9158603628367235, "support": 36380.0 }, "step": 1539 }, { "epoch": 20.0, "eval_B-Claim": { "f1-score": 0.7078313253012049, "precision": 0.7230769230769231, "recall": 0.6932153392330384, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9386503067484663, "precision": 0.9216867469879518, "recall": 0.95625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9058201058201059, "precision": 0.9020021074815595, "recall": 0.9096705632306057, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6867509620670699, "precision": 0.710256993404594, "recall": 0.6647509578544061, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9304635761589404, "precision": 0.894090909090909, "recall": 0.9699211045364892, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9132413977774803, "precision": 0.9085581085581086, "recall": 0.9179732184913532, "support": 14861.0 }, "eval_O": { "f1-score": 0.9989880439263896, "precision": 0.9999249699879952, "recall": 0.9980528720137797, "support": 13353.0 }, "eval_accuracy": 0.9154205607476635, "eval_loss": 0.6933491826057434, "eval_macro avg": { "f1-score": 0.8688208168285224, "precision": 0.8656566797982916, "recall": 0.8728334364799532, "support": 36380.0 }, "eval_runtime": 5.07, "eval_samples_per_second": 15.976, "eval_steps_per_second": 2.17, "eval_weighted avg": { "f1-score": 0.9144315421411552, "precision": 0.9138389454030841, "recall": 0.9154205607476635, "support": 36380.0 }, "step": 1620 }, { "epoch": 21.0, "eval_B-Claim": { "f1-score": 0.7069219440353463, "precision": 0.7058823529411765, "recall": 0.7079646017699115, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9153605015673982, "precision": 0.9182389937106918, "recall": 0.9125, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9070631970260223, "precision": 0.9065817409766455, "recall": 0.9075451647183846, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6948799473741915, "precision": 0.7164820257743613, "recall": 0.6745423584504044, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9177033492822966, "precision": 0.8912639405204461, "recall": 0.9457593688362919, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9171740148524787, "precision": 0.9119270937271337, "recall": 0.9224816634143059, "support": 14861.0 }, "eval_O": { "f1-score": 0.9989506820566633, "precision": 0.9997749606181082, "recall": 0.9981277615517112, "support": 13353.0 }, "eval_accuracy": 0.9170973062122045, "eval_loss": 0.6906760334968567, "eval_macro avg": { "f1-score": 0.8654362337420567, "precision": 0.8643073011812233, "recall": 0.866988702677287, "support": 36380.0 }, "eval_runtime": 5.0608, "eval_samples_per_second": 16.005, "eval_steps_per_second": 2.174, "eval_weighted avg": { "f1-score": 0.9162839627546457, "precision": 0.9157494555353419, "recall": 0.9170973062122045, "support": 36380.0 }, "step": 1701 }, { "epoch": 22.0, "eval_B-Claim": { "f1-score": 0.7186147186147187, "precision": 0.7033898305084746, "recall": 0.7345132743362832, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9325153374233127, "precision": 0.9156626506024096, "recall": 0.95, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9044038668098818, "precision": 0.9142236699239956, "recall": 0.8947927736450585, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.697543125980136, "precision": 0.685432504622971, "recall": 0.7100893997445722, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9283000949667617, "precision": 0.8951465201465202, "recall": 0.9640039447731755, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9106359835480471, "precision": 0.9201126528369281, "recall": 0.9013525334768858, "support": 14861.0 }, "eval_O": { "f1-score": 0.9989131656860174, "precision": 0.999774943735934, "recall": 0.9980528720137797, "support": 13353.0 }, "eval_accuracy": 0.91412864211105, "eval_loss": 0.7237672209739685, "eval_macro avg": { "f1-score": 0.8701323275755536, "precision": 0.8619632531967476, "recall": 0.8789721139985364, "support": 36380.0 }, "eval_runtime": 5.0999, "eval_samples_per_second": 15.883, "eval_steps_per_second": 2.157, "eval_weighted avg": { "f1-score": 0.9146496958116109, "precision": 0.9154631021750581, "recall": 0.91412864211105, "support": 36380.0 }, "step": 1782 }, { "epoch": 23.0, "eval_B-Claim": { "f1-score": 0.6973293768545994, "precision": 0.7014925373134329, "recall": 0.6932153392330384, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9090909090909091, "precision": 0.8823529411764706, "recall": 0.9375, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.903672166045769, "precision": 0.9051172707889126, "recall": 0.9022316684378321, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6736772921459087, "precision": 0.6939742721733243, "recall": 0.6545338441890166, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9026837806301049, "precision": 0.8568896765618077, "recall": 0.9536489151873767, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9126989456718825, "precision": 0.9108638831177535, "recall": 0.9145414171320907, "support": 14861.0 }, "eval_O": { "f1-score": 0.9989130842172332, "precision": 0.9998499399759904, "recall": 0.9979779824758481, "support": 13353.0 }, "eval_accuracy": 0.9114898295766904, "eval_loss": 0.7884248495101929, "eval_macro avg": { "f1-score": 0.8568665078080582, "precision": 0.8500772173010988, "recall": 0.8648070238078861, "support": 36380.0 }, "eval_runtime": 5.1019, "eval_samples_per_second": 15.876, "eval_steps_per_second": 2.156, "eval_weighted avg": { "f1-score": 0.9106621435750196, "precision": 0.9102832766025237, "recall": 0.9114898295766904, "support": 36380.0 }, "step": 1863 }, { "epoch": 24.0, "eval_B-Claim": { "f1-score": 0.7227866473149492, "precision": 0.7114285714285714, "recall": 0.7345132743362832, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9300911854103343, "precision": 0.9053254437869822, "recall": 0.95625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9065520945220195, "precision": 0.9163952225841476, "recall": 0.8969181721572795, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.700562572975268, "precision": 0.6987084480203261, "recall": 0.70242656449553, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9266207551650438, "precision": 0.8937242327072835, "recall": 0.9620315581854043, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9129007142615349, "precision": 0.9185286103542234, "recall": 0.9073413632999126, "support": 14861.0 }, "eval_O": { "f1-score": 0.9998876698992775, "precision": 0.9998502321401828, "recall": 0.9999251104620684, "support": 13353.0 }, "eval_accuracy": 0.916245189664651, "eval_loss": 0.7238907814025879, "eval_macro avg": { "f1-score": 0.8713430913640611, "precision": 0.8634229658602453, "recall": 0.8799151489909255, "support": 36380.0 }, "eval_runtime": 5.0787, "eval_samples_per_second": 15.949, "eval_steps_per_second": 2.166, "eval_weighted avg": { "f1-score": 0.9163125952217944, "precision": 0.9165644068758583, "recall": 0.916245189664651, "support": 36380.0 }, "step": 1944 }, { "epoch": 24.69, "grad_norm": 25.948219299316406, "learning_rate": 1.0123456790123458e-05, "loss": 0.0057, "step": 2000 }, { "epoch": 25.0, "eval_B-Claim": { "f1-score": 0.7191679049034176, "precision": 0.7245508982035929, "recall": 0.7138643067846607, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9386503067484663, "precision": 0.9216867469879518, "recall": 0.95625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9080276448697502, "precision": 0.9085106382978724, "recall": 0.9075451647183846, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.7102681491170699, "precision": 0.7278820375335121, "recall": 0.6934865900383141, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9318670810423142, "precision": 0.9044083526682135, "recall": 0.9610453648915187, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9183714534844725, "precision": 0.9154242160861136, "recall": 0.9213377296278851, "support": 14861.0 }, "eval_O": { "f1-score": 0.9999625538288709, "precision": 1.0, "recall": 0.9999251104620684, "support": 13353.0 }, "eval_accuracy": 0.9208356239692138, "eval_loss": 0.6898869872093201, "eval_macro avg": { "f1-score": 0.8751878705706231, "precision": 0.8717804128253224, "recall": 0.8790648952175474, "support": 36380.0 }, "eval_runtime": 5.0592, "eval_samples_per_second": 16.011, "eval_steps_per_second": 2.174, "eval_weighted avg": { "f1-score": 0.9201627360934556, "precision": 0.9197045181816026, "recall": 0.9208356239692138, "support": 36380.0 }, "step": 2025 }, { "epoch": 26.0, "eval_B-Claim": { "f1-score": 0.7195301027900147, "precision": 0.716374269005848, "recall": 0.7227138643067846, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9226006191950465, "precision": 0.9141104294478528, "recall": 0.93125, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9099627064464572, "precision": 0.9123931623931624, "recall": 0.9075451647183846, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.7121407121407122, "precision": 0.717682663207955, "recall": 0.7066836951894423, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9204819277108435, "precision": 0.9000942507068803, "recall": 0.9418145956607495, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9201467173671636, "precision": 0.9203015616585891, "recall": 0.9199919251732723, "support": 14861.0 }, "eval_O": { "f1-score": 0.9993255395683452, "precision": 1.0, "recall": 0.9986519883172321, "support": 13353.0 }, "eval_accuracy": 0.9204233095107202, "eval_loss": 0.6385065317153931, "eval_macro avg": { "f1-score": 0.8720269036026548, "precision": 0.8687080480600411, "recall": 0.8755216047665523, "support": 36380.0 }, "eval_runtime": 5.1139, "eval_samples_per_second": 15.839, "eval_steps_per_second": 2.151, "eval_weighted avg": { "f1-score": 0.9202441017693053, "precision": 0.9201301899865153, "recall": 0.9204233095107202, "support": 36380.0 }, "step": 2106 }, { "epoch": 27.0, "eval_B-Claim": { "f1-score": 0.7157894736842104, "precision": 0.7300613496932515, "recall": 0.7020648967551623, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.914826498422713, "precision": 0.9235668789808917, "recall": 0.90625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9131121642969985, "precision": 0.9050104384133612, "recall": 0.9213602550478215, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.7079230080572964, "precision": 0.746342614440774, "recall": 0.6732652192422307, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.922322960058809, "precision": 0.9167072576716999, "recall": 0.928007889546351, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9238369972479192, "precision": 0.9106419139756831, "recall": 0.9374200928605073, "support": 14861.0 }, "eval_O": { "f1-score": 0.9998876530726886, "precision": 1.0, "recall": 0.9997753313862053, "support": 13353.0 }, "eval_accuracy": 0.9229246838922485, "eval_loss": 0.6971738934516907, "eval_macro avg": { "f1-score": 0.8710998221200908, "precision": 0.876047207596523, "recall": 0.8668776692626111, "support": 36380.0 }, "eval_runtime": 5.1047, "eval_samples_per_second": 15.868, "eval_steps_per_second": 2.155, "eval_weighted avg": { "f1-score": 0.9215282605927277, "precision": 0.920789602130938, "recall": 0.9229246838922485, "support": 36380.0 }, "step": 2187 }, { "epoch": 28.0, "eval_B-Claim": { "f1-score": 0.701095461658842, "precision": 0.7466666666666667, "recall": 0.6607669616519174, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.929663608562691, "precision": 0.9101796407185628, "recall": 0.95, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9096605744125327, "precision": 0.8942505133470225, "recall": 0.9256110520722636, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6865845152290164, "precision": 0.7564036885245902, "recall": 0.6285653469561515, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9204761904761904, "precision": 0.889963167587477, "recall": 0.953155818540434, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9194842783844231, "precision": 0.8993115872096764, "recall": 0.9405827333288473, "support": 14861.0 }, "eval_O": { "f1-score": 0.9987627938364638, "precision": 1.0, "recall": 0.9975286452482588, "support": 13353.0 }, "eval_accuracy": 0.918938977460143, "eval_loss": 0.7604945302009583, "eval_macro avg": { "f1-score": 0.8665324889371655, "precision": 0.8709678948648565, "recall": 0.8651729368282675, "support": 36380.0 }, "eval_runtime": 5.0746, "eval_samples_per_second": 15.962, "eval_steps_per_second": 2.168, "eval_weighted avg": { "f1-score": 0.9163178491862402, "precision": 0.9157871854220297, "recall": 0.918938977460143, "support": 36380.0 }, "step": 2268 }, { "epoch": 29.0, "eval_B-Claim": { "f1-score": 0.7001569858712716, "precision": 0.7483221476510067, "recall": 0.6578171091445427, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9129129129129129, "precision": 0.8786127167630058, "recall": 0.95, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9115646258503401, "precision": 0.8979381443298969, "recall": 0.9256110520722636, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6709571729464077, "precision": 0.7450623700623701, "recall": 0.6102596849723286, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9053708439897699, "precision": 0.8565772107347118, "recall": 0.9600591715976331, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9177951303087214, "precision": 0.8991607488702389, "recall": 0.9372182221923154, "support": 14861.0 }, "eval_O": { "f1-score": 0.9990630036355459, "precision": 1.0, "recall": 0.9981277615517112, "support": 13353.0 }, "eval_accuracy": 0.9157778999450248, "eval_loss": 0.7660219073295593, "eval_macro avg": { "f1-score": 0.8596886679307099, "precision": 0.86081047691589, "recall": 0.8627275716472563, "support": 36380.0 }, "eval_runtime": 5.0668, "eval_samples_per_second": 15.986, "eval_steps_per_second": 2.171, "eval_weighted avg": { "f1-score": 0.9128447549856632, "precision": 0.912371867054256, "recall": 0.9157778999450248, "support": 36380.0 }, "step": 2349 }, { "epoch": 30.0, "eval_B-Claim": { "f1-score": 0.7267355982274741, "precision": 0.727810650887574, "recall": 0.7256637168141593, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9268292682926829, "precision": 0.9047619047619048, "recall": 0.95, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9119999999999999, "precision": 0.9154175588865097, "recall": 0.9086078639744952, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.7044324324324324, "precision": 0.7157293497363796, "recall": 0.6934865900383141, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9097726740098431, "precision": 0.8669048682447521, "recall": 0.9571005917159763, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9179300586213867, "precision": 0.9191687470481075, "recall": 0.9166947042594711, "support": 14861.0 }, "eval_O": { "f1-score": 0.9990630036355459, "precision": 1.0, "recall": 0.9981277615517112, "support": 13353.0 }, "eval_accuracy": 0.9181693238042881, "eval_loss": 0.7438024878501892, "eval_macro avg": { "f1-score": 0.8709661478884808, "precision": 0.8642561542236039, "recall": 0.8785258897648754, "support": 36380.0 }, "eval_runtime": 5.0797, "eval_samples_per_second": 15.946, "eval_steps_per_second": 2.165, "eval_weighted avg": { "f1-score": 0.9177882762480303, "precision": 0.9177087395840788, "recall": 0.9181693238042881, "support": 36380.0 }, "step": 2430 }, { "epoch": 30.86, "grad_norm": 0.005249341484159231, "learning_rate": 7.654320987654322e-06, "loss": 0.0029, "step": 2500 }, { "epoch": 31.0, "eval_B-Claim": { "f1-score": 0.7037037037037037, "precision": 0.7378640776699029, "recall": 0.672566371681416, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9102564102564101, "precision": 0.9342105263157895, "recall": 0.8875, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9135416666666667, "precision": 0.8958120531154239, "recall": 0.9319872476089267, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6881058273463337, "precision": 0.7410955539179562, "recall": 0.6421881651766709, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9200885173346448, "precision": 0.9176066699362433, "recall": 0.9225838264299803, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9197734158872348, "precision": 0.9007288911823518, "recall": 0.9396406702106184, "support": 14861.0 }, "eval_O": { "f1-score": 0.9990254872563717, "precision": 1.0, "recall": 0.9980528720137797, "support": 13353.0 }, "eval_accuracy": 0.9188015393073117, "eval_loss": 0.7646034359931946, "eval_macro avg": { "f1-score": 0.864927861207338, "precision": 0.8753311103053811, "recall": 0.8563598790173417, "support": 36380.0 }, "eval_runtime": 5.0935, "eval_samples_per_second": 15.903, "eval_steps_per_second": 2.16, "eval_weighted avg": { "f1-score": 0.9167465652664151, "precision": 0.9159943386289288, "recall": 0.9188015393073117, "support": 36380.0 }, "step": 2511 }, { "epoch": 32.0, "eval_B-Claim": { "f1-score": 0.7127819548872181, "precision": 0.7269938650306749, "recall": 0.6991150442477876, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.916923076923077, "precision": 0.9030303030303031, "recall": 0.93125, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9111111111111111, "precision": 0.9072708113804004, "recall": 0.9149840595111584, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6915867240048518, "precision": 0.7174559597346145, "recall": 0.6675180928054492, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9048414023372287, "precision": 0.8762124711316397, "recall": 0.935404339250493, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9185957176737817, "precision": 0.9120456354470682, "recall": 0.9252405625462621, "support": 14861.0 }, "eval_O": { "f1-score": 0.9989880439263896, "precision": 0.9999249699879952, "recall": 0.9980528720137797, "support": 13353.0 }, "eval_accuracy": 0.9169048927982408, "eval_loss": 0.7867851853370667, "eval_macro avg": { "f1-score": 0.8649754329805226, "precision": 0.8632762879632423, "recall": 0.8673664243392756, "support": 36380.0 }, "eval_runtime": 5.0786, "eval_samples_per_second": 15.949, "eval_steps_per_second": 2.166, "eval_weighted avg": { "f1-score": 0.9159023483997761, "precision": 0.9152873255952713, "recall": 0.9169048927982408, "support": 36380.0 }, "step": 2592 }, { "epoch": 33.0, "eval_B-Claim": { "f1-score": 0.7119565217391305, "precision": 0.6599496221662469, "recall": 0.7728613569321534, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9153605015673982, "precision": 0.9182389937106918, "recall": 0.9125, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.8976464148877943, "precision": 0.9255079006772009, "recall": 0.871413390010627, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.694591622513246, "precision": 0.6548539114043356, "recall": 0.7394636015325671, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.913297613248904, "precision": 0.9023099133782483, "recall": 0.9245562130177515, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.906384295389693, "precision": 0.9266132433572333, "recall": 0.8870197160352601, "support": 14861.0 }, "eval_O": { "f1-score": 0.9991005172026085, "precision": 1.0, "recall": 0.9982026510896428, "support": 13353.0 }, "eval_accuracy": 0.9095107201759208, "eval_loss": 0.8171125650405884, "eval_macro avg": { "f1-score": 0.8626196409355392, "precision": 0.8553533692419938, "recall": 0.8722881326597145, "support": 36380.0 }, "eval_runtime": 5.0657, "eval_samples_per_second": 15.99, "eval_steps_per_second": 2.171, "eval_weighted avg": { "f1-score": 0.9114519362220568, "precision": 0.9145500738066468, "recall": 0.9095107201759208, "support": 36380.0 }, "step": 2673 }, { "epoch": 34.0, "eval_B-Claim": { "f1-score": 0.7111801242236023, "precision": 0.7508196721311475, "recall": 0.6755162241887905, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9263803680981595, "precision": 0.9096385542168675, "recall": 0.94375, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9131799163179917, "precision": 0.8990731204943357, "recall": 0.9277364505844846, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6864754098360655, "precision": 0.737885462555066, "recall": 0.6417624521072797, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9156047126713153, "precision": 0.8934772407320507, "recall": 0.9388560157790927, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9185062789160608, "precision": 0.9024611987791415, "recall": 0.9351322252876657, "support": 14861.0 }, "eval_O": { "f1-score": 0.99883786316776, "precision": 1.0, "recall": 0.9976784243241219, "support": 13353.0 }, "eval_accuracy": 0.9178394722374932, "eval_loss": 0.8038027882575989, "eval_macro avg": { "f1-score": 0.8671663818901364, "precision": 0.8704793212726585, "recall": 0.8657759703244906, "support": 36380.0 }, "eval_runtime": 5.0399, "eval_samples_per_second": 16.072, "eval_steps_per_second": 2.183, "eval_weighted avg": { "f1-score": 0.915830809588254, "precision": 0.9150393476156526, "recall": 0.9178394722374932, "support": 36380.0 }, "step": 2754 }, { "epoch": 35.0, "eval_B-Claim": { "f1-score": 0.7168141592920354, "precision": 0.7168141592920354, "recall": 0.7168141592920354, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9174311926605505, "precision": 0.8982035928143712, "recall": 0.9375, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.908315565031983, "precision": 0.9112299465240642, "recall": 0.9054197662061636, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6992383025027203, "precision": 0.7152715939447908, "recall": 0.6839080459770115, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9111323459149546, "precision": 0.8836886005560705, "recall": 0.9403353057199211, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9168091741273514, "precision": 0.9137147440181794, "recall": 0.9199246349505417, "support": 14861.0 }, "eval_O": { "f1-score": 0.9990254872563717, "precision": 1.0, "recall": 0.9980528720137797, "support": 13353.0 }, "eval_accuracy": 0.9170698185816383, "eval_loss": 0.7819703221321106, "eval_macro avg": { "f1-score": 0.8669666038265668, "precision": 0.8627032338785016, "recall": 0.8717078263084933, "support": 36380.0 }, "eval_runtime": 5.0537, "eval_samples_per_second": 16.028, "eval_steps_per_second": 2.177, "eval_weighted avg": { "f1-score": 0.9164925879119019, "precision": 0.9161176799247925, "recall": 0.9170698185816383, "support": 36380.0 }, "step": 2835 }, { "epoch": 36.0, "eval_B-Claim": { "f1-score": 0.725111441307578, "precision": 0.7305389221556886, "recall": 0.7197640117994101, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.924924924924925, "precision": 0.8901734104046243, "recall": 0.9625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9109333333333333, "precision": 0.9143468950749465, "recall": 0.9075451647183846, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6972032591940103, "precision": 0.7221715328467153, "recall": 0.6739037888463176, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9117442668519805, "precision": 0.8597640891218873, "recall": 0.9704142011834319, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9174804523641732, "precision": 0.9151158120230285, "recall": 0.919857344727811, "support": 14861.0 }, "eval_O": { "f1-score": 0.9990630036355459, "precision": 1.0, "recall": 0.9981277615517112, "support": 13353.0 }, "eval_accuracy": 0.9176470588235294, "eval_loss": 0.7942823171615601, "eval_macro avg": { "f1-score": 0.8694943830873637, "precision": 0.8617300945181272, "recall": 0.8788731818324381, "support": 36380.0 }, "eval_runtime": 5.0747, "eval_samples_per_second": 15.961, "eval_steps_per_second": 2.168, "eval_weighted avg": { "f1-score": 0.9167298682863283, "precision": 0.9164205713322794, "recall": 0.9176470588235294, "support": 36380.0 }, "step": 2916 }, { "epoch": 37.0, "eval_B-Claim": { "f1-score": 0.7201166180758017, "precision": 0.7118155619596542, "recall": 0.7286135693215339, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9333333333333333, "precision": 0.9058823529411765, "recall": 0.9625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9055793991416309, "precision": 0.914409534127844, "recall": 0.8969181721572795, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.697548860814167, "precision": 0.7078676309445541, "recall": 0.687526607066837, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9257089289899227, "precision": 0.8820902188476999, "recall": 0.9738658777120316, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9140158542755945, "precision": 0.9163961038961039, "recall": 0.9116479375546733, "support": 14861.0 }, "eval_O": { "f1-score": 0.9999625566330924, "precision": 0.9999251160700914, "recall": 1.0, "support": 13353.0 }, "eval_accuracy": 0.9167399670148433, "eval_loss": 0.8100723624229431, "eval_macro avg": { "f1-score": 0.8708950787519346, "precision": 0.8626266455410178, "recall": 0.8801531662589079, "support": 36380.0 }, "eval_runtime": 5.1035, "eval_samples_per_second": 15.871, "eval_steps_per_second": 2.155, "eval_weighted avg": { "f1-score": 0.9163198176652315, "precision": 0.9162097221680536, "recall": 0.9167399670148433, "support": 36380.0 }, "step": 2997 }, { "epoch": 37.04, "grad_norm": 0.014361537992954254, "learning_rate": 5.185185185185185e-06, "loss": 0.0012, "step": 3000 }, { "epoch": 38.0, "eval_B-Claim": { "f1-score": 0.7090103397341211, "precision": 0.7100591715976331, "recall": 0.7079646017699115, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9226006191950465, "precision": 0.9141104294478528, "recall": 0.93125, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9069643806485912, "precision": 0.9074468085106383, "recall": 0.9064824654622742, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6971639682712161, "precision": 0.7120976692563818, "recall": 0.6828437633035335, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9183969097054563, "precision": 0.8997161778618732, "recall": 0.9378698224852071, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9169401835354009, "precision": 0.9127825565113022, "recall": 0.9211358589596932, "support": 14861.0 }, "eval_O": { "f1-score": 0.9988753936122357, "precision": 1.0, "recall": 0.9977533138620535, "support": 13353.0 }, "eval_accuracy": 0.9170973062122045, "eval_loss": 0.7743718028068542, "eval_macro avg": { "f1-score": 0.8671359706717239, "precision": 0.8651732590265259, "recall": 0.8693285465489532, "support": 36380.0 }, "eval_runtime": 5.0608, "eval_samples_per_second": 16.005, "eval_steps_per_second": 2.174, "eval_weighted avg": { "f1-score": 0.9165431704418915, "precision": 0.9161296318272839, "recall": 0.9170973062122045, "support": 36380.0 }, "step": 3078 }, { "epoch": 39.0, "eval_B-Claim": { "f1-score": 0.7212643678160918, "precision": 0.7030812324929971, "recall": 0.7404129793510325, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.924924924924925, "precision": 0.8901734104046243, "recall": 0.9625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9049676025917925, "precision": 0.9198682766190999, "recall": 0.8905419766206164, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.7112540192926046, "precision": 0.716321243523316, "recall": 0.7062579821200511, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9149382428338383, "precision": 0.8674326115775519, "recall": 0.967948717948718, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9182190067277459, "precision": 0.9226797119173801, "recall": 0.9138012246820537, "support": 14861.0 }, "eval_O": { "f1-score": 0.9989879680647701, "precision": 1.0, "recall": 0.9979779824758481, "support": 13353.0 }, "eval_accuracy": 0.9189114898295767, "eval_loss": 0.786410927772522, "eval_macro avg": { "f1-score": 0.8706508760359669, "precision": 0.8599366409335669, "recall": 0.8827772661711885, "support": 36380.0 }, "eval_runtime": 5.0796, "eval_samples_per_second": 15.946, "eval_steps_per_second": 2.166, "eval_weighted avg": { "f1-score": 0.9188063842322733, "precision": 0.9190693256991324, "recall": 0.9189114898295767, "support": 36380.0 }, "step": 3159 }, { "epoch": 40.0, "eval_B-Claim": { "f1-score": 0.7025411061285501, "precision": 0.7121212121212122, "recall": 0.6932153392330384, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9153605015673982, "precision": 0.9182389937106918, "recall": 0.9125, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.907554146856841, "precision": 0.9023109243697479, "recall": 0.9128586609989373, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6935466199189219, "precision": 0.7146082637164145, "recall": 0.6736909323116219, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9152542372881355, "precision": 0.8991436726926736, "recall": 0.9319526627218935, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9170396099124974, "precision": 0.9103507724951926, "recall": 0.9238274678689187, "support": 14861.0 }, "eval_O": { "f1-score": 0.9990254872563717, "precision": 1.0, "recall": 0.9980528720137797, "support": 13353.0 }, "eval_accuracy": 0.9167399670148433, "eval_loss": 0.7890852093696594, "eval_macro avg": { "f1-score": 0.8643316727041023, "precision": 0.8652534055865618, "recall": 0.8637282764497414, "support": 36380.0 }, "eval_runtime": 5.0635, "eval_samples_per_second": 15.997, "eval_steps_per_second": 2.172, "eval_weighted avg": { "f1-score": 0.9159196854113261, "precision": 0.9153330877115516, "recall": 0.9167399670148433, "support": 36380.0 }, "step": 3240 }, { "epoch": 41.0, "eval_B-Claim": { "f1-score": 0.7040498442367601, "precision": 0.7458745874587459, "recall": 0.6666666666666666, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9221556886227545, "precision": 0.8850574712643678, "recall": 0.9625, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.910236220472441, "precision": 0.8993775933609959, "recall": 0.9213602550478215, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.6905677448541053, "precision": 0.7367277992277992, "recall": 0.6498510004257131, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9161620866325104, "precision": 0.8680494263018534, "recall": 0.9699211045364892, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9182410856838743, "precision": 0.9079129119252779, "recall": 0.9288069443509858, "support": 14861.0 }, "eval_O": { "f1-score": 0.9989879680647701, "precision": 1.0, "recall": 0.9979779824758481, "support": 13353.0 }, "eval_accuracy": 0.9179769103903244, "eval_loss": 0.8002001047134399, "eval_macro avg": { "f1-score": 0.8657715197953165, "precision": 0.863285684219863, "recall": 0.8710119933576463, "support": 36380.0 }, "eval_runtime": 5.0812, "eval_samples_per_second": 15.941, "eval_steps_per_second": 2.165, "eval_weighted avg": { "f1-score": 0.9161759532647805, "precision": 0.9155530571010836, "recall": 0.9179769103903244, "support": 36380.0 }, "step": 3321 }, { "epoch": 42.0, "eval_B-Claim": { "f1-score": 0.7157001414427157, "precision": 0.6875, "recall": 0.7463126843657817, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9325153374233127, "precision": 0.9156626506024096, "recall": 0.95, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9015151515151515, "precision": 0.918412348401323, "recall": 0.8852284803400637, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.7007630439265827, "precision": 0.6796, "recall": 0.7232865048957003, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9199237368922784, "precision": 0.8902214022140221, "recall": 0.9516765285996055, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9111384111384111, "precision": 0.9243232015509243, "recall": 0.8983244734540071, "support": 14861.0 }, "eval_O": { "f1-score": 0.9990630036355459, "precision": 1.0, "recall": 0.9981277615517112, "support": 13353.0 }, "eval_accuracy": 0.9137987905442551, "eval_loss": 0.8046557307243347, "eval_macro avg": { "f1-score": 0.8686598322819996, "precision": 0.8593885146812399, "recall": 0.8789937761724099, "support": 36380.0 }, "eval_runtime": 5.1004, "eval_samples_per_second": 15.881, "eval_steps_per_second": 2.157, "eval_weighted avg": { "f1-score": 0.9147569239629072, "precision": 0.9161982255602077, "recall": 0.9137987905442551, "support": 36380.0 }, "step": 3402 }, { "epoch": 43.0, "eval_B-Claim": { "f1-score": 0.7159763313609467, "precision": 0.7181008902077152, "recall": 0.7138643067846607, "support": 339.0 }, "eval_B-MajorClaim": { "f1-score": 0.9292307692307692, "precision": 0.9151515151515152, "recall": 0.94375, "support": 160.0 }, "eval_B-Premise": { "f1-score": 0.9085106382978723, "precision": 0.9094781682641108, "recall": 0.9075451647183846, "support": 941.0 }, "eval_I-Claim": { "f1-score": 0.7010042112082928, "precision": 0.7113740959894806, "recall": 0.6909323116219668, "support": 4698.0 }, "eval_I-MajorClaim": { "f1-score": 0.9217098943323727, "precision": 0.8984082397003745, "recall": 0.9462524654832347, "support": 2028.0 }, "eval_I-Premise": { "f1-score": 0.9170417142473298, "precision": 0.9154429021658955, "recall": 0.9186461207186596, "support": 14861.0 }, "eval_O": { "f1-score": 0.9990254872563717, "precision": 1.0, "recall": 0.9980528720137797, "support": 13353.0 }, "eval_accuracy": 0.9178394722374932, "eval_loss": 0.7769815921783447, "eval_macro avg": { "f1-score": 0.8703570065619937, "precision": 0.8668508302112988, "recall": 0.8741490344772409, "support": 36380.0 }, "eval_runtime": 5.0739, "eval_samples_per_second": 15.964, "eval_steps_per_second": 2.168, "eval_weighted avg": { "f1-score": 0.91745440095583, "precision": 0.9171820720917949, "recall": 0.9178394722374932, "support": 36380.0 }, "step": 3483 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 6163803068157000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }