{ "best_metric": null, "best_model_checkpoint": null, "epoch": 13.0, "eval_steps": 500, "global_step": 1053, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_B-Claim": { "f1-score": 0.18764302059496568, "precision": 0.3416666666666667, "recall": 0.12933753943217666, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.19540229885057472, "precision": 0.8947368421052632, "recall": 0.10967741935483871, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8476339053562143, "precision": 0.740909090909091, "recall": 0.9902794653705954, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.4315998169894769, "precision": 0.6394035246272028, "recall": 0.3257366482504604, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.7779799818016379, "precision": 0.7493426818580193, "recall": 0.8088930936613056, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.8948683715038855, "precision": 0.8376057421174058, "recall": 0.9605350187403542, "support": 13607.0 }, "eval_O": { "f1-score": 0.9980147987727848, "precision": 0.9970251509961237, "recall": 0.9990064131514769, "support": 11071.0 }, "eval_accuracy": 0.8673183065585397, "eval_loss": 0.3193369209766388, "eval_macro avg": { "f1-score": 0.6190203134099342, "precision": 0.7429556713256817, "recall": 0.6176379425658869, "support": 32431.0 }, "eval_runtime": 4.9083, "eval_samples_per_second": 16.299, "eval_steps_per_second": 2.037, "eval_weighted avg": { "f1-score": 0.8489527906249091, "precision": 0.8526967051825685, "recall": 0.8673183065585397, "support": 32431.0 }, "step": 81 }, { "epoch": 2.0, "eval_B-Claim": { "f1-score": 0.583941605839416, "precision": 0.6926406926406926, "recall": 0.5047318611987381, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8117647058823529, "precision": 0.745945945945946, "recall": 0.8903225806451613, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8943661971830986, "precision": 0.8649262202043133, "recall": 0.9258809234507898, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6197740770932805, "precision": 0.6271506009898656, "recall": 0.6125690607734806, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.8298737427776589, "precision": 0.7577178585384916, "recall": 0.9172185430463576, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9013697613125952, "precision": 0.9106660666066607, "recall": 0.8922613360770192, "support": 13607.0 }, "eval_O": { "f1-score": 0.9961487925331883, "precision": 0.9993636363636363, "recall": 0.9929545659831993, "support": 11071.0 }, "eval_accuracy": 0.887854213561099, "eval_loss": 0.26407390832901, "eval_macro avg": { "f1-score": 0.8053198403745129, "precision": 0.7997730030413723, "recall": 0.8194198387392494, "support": 32431.0 }, "eval_runtime": 4.9329, "eval_samples_per_second": 16.218, "eval_steps_per_second": 2.027, "eval_weighted avg": { "f1-score": 0.8876368199002541, "precision": 0.8889201304482091, "recall": 0.887854213561099, "support": 32431.0 }, "step": 162 }, { "epoch": 3.0, "eval_B-Claim": { "f1-score": 0.5895522388059701, "precision": 0.7214611872146118, "recall": 0.49842271293375395, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8668941979522183, "precision": 0.9202898550724637, "recall": 0.8193548387096774, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8929178470254956, "precision": 0.8365180467091295, "recall": 0.9574726609963548, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6548140728684112, "precision": 0.7178149876475433, "recall": 0.6019797421731123, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.8815622622368754, "precision": 0.9502460360852925, "recall": 0.8221381267738883, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9169943670953343, "precision": 0.8852257181942544, "recall": 0.9511280958330272, "support": 13607.0 }, "eval_O": { "f1-score": 0.9984170774727511, "precision": 0.9998188405797102, "recall": 0.9970192394544305, "support": 11071.0 }, "eval_accuracy": 0.9067250470229101, "eval_loss": 0.25323203206062317, "eval_macro avg": { "f1-score": 0.8287360090652937, "precision": 0.8616249530718579, "recall": 0.8067879166963207, "support": 32431.0 }, "eval_runtime": 4.9025, "eval_samples_per_second": 16.318, "eval_steps_per_second": 2.04, "eval_weighted avg": { "f1-score": 0.9033110548907601, "precision": 0.9034896128542845, "recall": 0.9067250470229101, "support": 32431.0 }, "step": 243 }, { "epoch": 4.0, "eval_B-Claim": { "f1-score": 0.6806136680613668, "precision": 0.61, "recall": 0.7697160883280757, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8861538461538462, "precision": 0.8470588235294118, "recall": 0.9290322580645162, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8687258687258688, "precision": 0.9233926128590971, "recall": 0.8201701093560145, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.656155854589275, "precision": 0.5819380121125757, "recall": 0.7520718232044199, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.8818897637795275, "precision": 0.8408408408408409, "recall": 0.9271523178807947, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.8829758349171871, "precision": 0.9349433218334154, "recall": 0.836481222899978, "support": 13607.0 }, "eval_O": { "f1-score": 0.9971017118014671, "precision": 0.9998183634547271, "recall": 0.9943997832174148, "support": 11071.0 }, "eval_accuracy": 0.8843698930036077, "eval_loss": 0.2791365385055542, "eval_macro avg": { "f1-score": 0.8362309354326484, "precision": 0.8197131392328668, "recall": 0.8612890861358877, "support": 32431.0 }, "eval_runtime": 4.9234, "eval_samples_per_second": 16.249, "eval_steps_per_second": 2.031, "eval_weighted avg": { "f1-score": 0.889158243622726, "precision": 0.8997827813567029, "recall": 0.8843698930036077, "support": 32431.0 }, "step": 324 }, { "epoch": 5.0, "eval_B-Claim": { "f1-score": 0.6078799249530958, "precision": 0.75, "recall": 0.5110410094637224, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8971962616822431, "precision": 0.8674698795180723, "recall": 0.9290322580645162, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.893739230327398, "precision": 0.8474945533769063, "recall": 0.945321992709599, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6119813135476778, "precision": 0.7590320381731425, "recall": 0.5126611418047882, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.8889393595275948, "precision": 0.854958497160332, "recall": 0.9257332071901608, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9137743463765573, "precision": 0.874462654486835, "recall": 0.956786947894466, "support": 13607.0 }, "eval_O": { "f1-score": 0.9976913675252366, "precision": 1.0, "recall": 0.9953933700659381, "support": 11071.0 }, "eval_accuracy": 0.9036724121982054, "eval_loss": 0.3500836193561554, "eval_macro avg": { "f1-score": 0.8301716862771149, "precision": 0.8504882318164697, "recall": 0.8251385610275985, "support": 32431.0 }, "eval_runtime": 4.9239, "eval_samples_per_second": 16.247, "eval_steps_per_second": 2.031, "eval_weighted avg": { "f1-score": 0.8968007164885599, "precision": 0.8986502613295283, "recall": 0.9036724121982054, "support": 32431.0 }, "step": 405 }, { "epoch": 6.0, "eval_B-Claim": { "f1-score": 0.672077922077922, "precision": 0.6923076923076923, "recall": 0.6529968454258676, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8910256410256411, "precision": 0.8853503184713376, "recall": 0.896774193548387, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8907563025210083, "precision": 0.8801897983392646, "recall": 0.9015795868772782, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6666666666666667, "precision": 0.6998228296633764, "recall": 0.6365101289134438, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.8853370396108408, "precision": 0.867453472537449, "recall": 0.9039735099337748, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9098497495826378, "precision": 0.8987595898759589, "recall": 0.9212170206511354, "support": 13607.0 }, "eval_O": { "f1-score": 0.998190209030857, "precision": 1.0, "recall": 0.9963869569144612, "support": 11071.0 }, "eval_accuracy": 0.9043816101877833, "eval_loss": 0.30672115087509155, "eval_macro avg": { "f1-score": 0.8448433615022248, "precision": 0.8462691001707254, "recall": 0.8442054631806213, "support": 32431.0 }, "eval_runtime": 4.9018, "eval_samples_per_second": 16.321, "eval_steps_per_second": 2.04, "eval_weighted avg": { "f1-score": 0.9029367568413356, "precision": 0.9020793621628027, "recall": 0.9043816101877833, "support": 32431.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 0.43724608421325684, "learning_rate": 1.7530864197530865e-05, "loss": 0.2434, "step": 500 }, { "epoch": 7.0, "eval_B-Claim": { "f1-score": 0.6763285024154589, "precision": 0.6907894736842105, "recall": 0.6624605678233438, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8952380952380952, "precision": 0.88125, "recall": 0.9096774193548387, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8941034897713597, "precision": 0.8855780691299165, "recall": 0.9027946537059538, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6960393719240684, "precision": 0.7088305489260143, "recall": 0.6837016574585635, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.9140037593984962, "precision": 0.9080298786181139, "recall": 0.9200567644276254, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9164355568530141, "precision": 0.9101253895774444, "recall": 0.9228338355258323, "support": 13607.0 }, "eval_O": { "f1-score": 0.9967376529225193, "precision": 1.0, "recall": 0.9934965224460302, "support": 11071.0 }, "eval_accuracy": 0.9116277635595572, "eval_loss": 0.34577852487564087, "eval_macro avg": { "f1-score": 0.855555204074716, "precision": 0.8549433371336713, "recall": 0.8564316315345982, "support": 32431.0 }, "eval_runtime": 4.8989, "eval_samples_per_second": 16.33, "eval_steps_per_second": 2.041, "eval_weighted avg": { "f1-score": 0.9111536914902465, "precision": 0.910801887328957, "recall": 0.9116277635595572, "support": 32431.0 }, "step": 567 }, { "epoch": 8.0, "eval_B-Claim": { "f1-score": 0.701923076923077, "precision": 0.7133550488599348, "recall": 0.6908517350157729, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.9260450160771705, "precision": 0.9230769230769231, "recall": 0.9290322580645162, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8985507246376812, "precision": 0.8931572629051621, "recall": 0.9040097205346294, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6915371329879102, "precision": 0.6917760884588804, "recall": 0.6912983425414365, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.9147286821705426, "precision": 0.9085394307046197, "recall": 0.9210028382213813, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.912640635340834, "precision": 0.9131778382753293, "recall": 0.9121040640846623, "support": 13607.0 }, "eval_O": { "f1-score": 0.9993222177036736, "precision": 0.9998191681735985, "recall": 0.9988257609971999, "support": 11071.0 }, "eval_accuracy": 0.9104252104467947, "eval_loss": 0.3896510601043701, "eval_macro avg": { "f1-score": 0.8635353551201269, "precision": 0.863271680064921, "recall": 0.8638749599227997, "support": 32431.0 }, "eval_runtime": 4.9786, "eval_samples_per_second": 16.069, "eval_steps_per_second": 2.009, "eval_weighted avg": { "f1-score": 0.9103982293589928, "precision": 0.9103825180220693, "recall": 0.9104252104467947, "support": 32431.0 }, "step": 648 }, { "epoch": 9.0, "eval_B-Claim": { "f1-score": 0.6862745098039215, "precision": 0.6171284634760705, "recall": 0.7728706624605678, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.9119496855345912, "precision": 0.8895705521472392, "recall": 0.9354838709677419, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8690095846645367, "precision": 0.9164420485175202, "recall": 0.8262454434993924, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6841291120965247, "precision": 0.619652531290865, "recall": 0.7635819521178637, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.9025385312783318, "precision": 0.8664055700609226, "recall": 0.9418164616840113, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.8949834831374357, "precision": 0.9374748531423513, "recall": 0.8561769677371941, "support": 13607.0 }, "eval_O": { "f1-score": 0.9989151071331707, "precision": 0.9998190209030857, "recall": 0.9980128263029536, "support": 11071.0 }, "eval_accuracy": 0.8965804323024267, "eval_loss": 0.4196818470954895, "eval_macro avg": { "f1-score": 0.8496857162355017, "precision": 0.8352132913625792, "recall": 0.8705983121099606, "support": 32431.0 }, "eval_runtime": 4.9285, "eval_samples_per_second": 16.232, "eval_steps_per_second": 2.029, "eval_weighted avg": { "f1-score": 0.9000940098435676, "precision": 0.9076597589527613, "recall": 0.8965804323024267, "support": 32431.0 }, "step": 729 }, { "epoch": 10.0, "eval_B-Claim": { "f1-score": 0.6905537459283387, "precision": 0.7138047138047138, "recall": 0.668769716088328, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.9142857142857143, "precision": 0.9, "recall": 0.9290322580645162, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.894484412470024, "precision": 0.8828402366863906, "recall": 0.9064398541919806, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.681682755333095, "precision": 0.7066963182604399, "recall": 0.6583793738489871, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.9083702494754021, "precision": 0.895632183908046, "recall": 0.9214758751182592, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9129089784183134, "precision": 0.904253785147801, "recall": 0.9217314617476299, "support": 13607.0 }, "eval_O": { "f1-score": 0.9983716301791207, "precision": 0.999909395669113, "recall": 0.9968385873001535, "support": 11071.0 }, "eval_accuracy": 0.9092534920292313, "eval_loss": 0.5311424136161804, "eval_macro avg": { "f1-score": 0.8572367837271441, "precision": 0.8575909476395005, "recall": 0.857523875194265, "support": 32431.0 }, "eval_runtime": 4.8937, "eval_samples_per_second": 16.348, "eval_steps_per_second": 2.043, "eval_weighted avg": { "f1-score": 0.9081813659228948, "precision": 0.9074585127295745, "recall": 0.9092534920292313, "support": 32431.0 }, "step": 810 }, { "epoch": 11.0, "eval_B-Claim": { "f1-score": 0.680921052631579, "precision": 0.711340206185567, "recall": 0.6529968454258676, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.9009584664536742, "precision": 0.8924050632911392, "recall": 0.9096774193548387, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8942020322773461, "precision": 0.88, "recall": 0.9088699878493317, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6838570400575678, "precision": 0.713820731096645, "recall": 0.6563075506445673, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.902502979737783, "precision": 0.9096588178760211, "recall": 0.8954588457899716, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9134434303925829, "precision": 0.9004641199571581, "recall": 0.9268023811273609, "support": 13607.0 }, "eval_O": { "f1-score": 0.9988699543461556, "precision": 0.999728555917481, "recall": 0.9980128263029536, "support": 11071.0 }, "eval_accuracy": 0.9096235083716198, "eval_loss": 0.49213308095932007, "eval_macro avg": { "f1-score": 0.8535364222709555, "precision": 0.8582024991891446, "recall": 0.8497322652135558, "support": 32431.0 }, "eval_runtime": 4.9244, "eval_samples_per_second": 16.246, "eval_steps_per_second": 2.031, "eval_weighted avg": { "f1-score": 0.9083194817194259, "precision": 0.9075428987655072, "recall": 0.9096235083716198, "support": 32431.0 }, "step": 891 }, { "epoch": 12.0, "eval_B-Claim": { "f1-score": 0.6577181208053691, "precision": 0.7025089605734767, "recall": 0.6182965299684543, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8978328173374612, "precision": 0.8630952380952381, "recall": 0.9354838709677419, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8902147971360382, "precision": 0.8745603751465416, "recall": 0.9064398541919806, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6518804243008679, "precision": 0.6842105263157895, "recall": 0.6224677716390423, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.8919829328542555, "precision": 0.8490808037622916, "recall": 0.9394512771996215, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9075452422650322, "precision": 0.9011665821317296, "recall": 0.9140148453002132, "support": 13607.0 }, "eval_O": { "f1-score": 0.9985526910900046, "precision": 1.0, "recall": 0.9971095655315689, "support": 11071.0 }, "eval_accuracy": 0.9020073386574574, "eval_loss": 0.5465030670166016, "eval_macro avg": { "f1-score": 0.8422467179698613, "precision": 0.8392317837178667, "recall": 0.8476091021140889, "support": 32431.0 }, "eval_runtime": 4.8988, "eval_samples_per_second": 16.331, "eval_steps_per_second": 2.041, "eval_weighted avg": { "f1-score": 0.9004246846165758, "precision": 0.8996509302731299, "recall": 0.9020073386574574, "support": 32431.0 }, "step": 972 }, { "epoch": 12.35, "grad_norm": 0.1318705528974533, "learning_rate": 1.506172839506173e-05, "loss": 0.0258, "step": 1000 }, { "epoch": 13.0, "eval_B-Claim": { "f1-score": 0.6857142857142857, "precision": 0.6551724137931034, "recall": 0.7192429022082019, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.9022082018927444, "precision": 0.8827160493827161, "recall": 0.9225806451612903, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8858560794044665, "precision": 0.9049429657794676, "recall": 0.8675577156743621, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6909411894516165, "precision": 0.6634880271243908, "recall": 0.7207642725598526, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.9048498845265589, "precision": 0.8840252707581228, "recall": 0.9266792809839167, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.906777130915062, "precision": 0.9211464098870271, "recall": 0.8928492687587272, "support": 13607.0 }, "eval_O": { "f1-score": 0.9971466099008107, "precision": 1.0, "recall": 0.9943094571402764, "support": 11071.0 }, "eval_accuracy": 0.9044432795781814, "eval_loss": 0.5739177465438843, "eval_macro avg": { "f1-score": 0.8533561974007922, "precision": 0.8444987338178326, "recall": 0.8634262203552325, "support": 32431.0 }, "eval_runtime": 4.929, "eval_samples_per_second": 16.231, "eval_steps_per_second": 2.029, "eval_weighted avg": { "f1-score": 0.9058771229581195, "precision": 0.9079380628166581, "recall": 0.9044432795781814, "support": 32431.0 }, "step": 1053 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1869280565334000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }