{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 567, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_B-Claim": { "f1-score": 0.18764302059496568, "precision": 0.3416666666666667, "recall": 0.12933753943217666, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.19540229885057472, "precision": 0.8947368421052632, "recall": 0.10967741935483871, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8476339053562143, "precision": 0.740909090909091, "recall": 0.9902794653705954, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.4315998169894769, "precision": 0.6394035246272028, "recall": 0.3257366482504604, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.7779799818016379, "precision": 0.7493426818580193, "recall": 0.8088930936613056, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.8948683715038855, "precision": 0.8376057421174058, "recall": 0.9605350187403542, "support": 13607.0 }, "eval_O": { "f1-score": 0.9980147987727848, "precision": 0.9970251509961237, "recall": 0.9990064131514769, "support": 11071.0 }, "eval_accuracy": 0.8673183065585397, "eval_loss": 0.3193369209766388, "eval_macro avg": { "f1-score": 0.6190203134099342, "precision": 0.7429556713256817, "recall": 0.6176379425658869, "support": 32431.0 }, "eval_runtime": 4.9083, "eval_samples_per_second": 16.299, "eval_steps_per_second": 2.037, "eval_weighted avg": { "f1-score": 0.8489527906249091, "precision": 0.8526967051825685, "recall": 0.8673183065585397, "support": 32431.0 }, "step": 81 }, { "epoch": 2.0, "eval_B-Claim": { "f1-score": 0.583941605839416, "precision": 0.6926406926406926, "recall": 0.5047318611987381, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8117647058823529, "precision": 0.745945945945946, "recall": 0.8903225806451613, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8943661971830986, "precision": 0.8649262202043133, "recall": 0.9258809234507898, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6197740770932805, "precision": 0.6271506009898656, "recall": 0.6125690607734806, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.8298737427776589, "precision": 0.7577178585384916, "recall": 0.9172185430463576, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9013697613125952, "precision": 0.9106660666066607, "recall": 0.8922613360770192, "support": 13607.0 }, "eval_O": { "f1-score": 0.9961487925331883, "precision": 0.9993636363636363, "recall": 0.9929545659831993, "support": 11071.0 }, "eval_accuracy": 0.887854213561099, "eval_loss": 0.26407390832901, "eval_macro avg": { "f1-score": 0.8053198403745129, "precision": 0.7997730030413723, "recall": 0.8194198387392494, "support": 32431.0 }, "eval_runtime": 4.9329, "eval_samples_per_second": 16.218, "eval_steps_per_second": 2.027, "eval_weighted avg": { "f1-score": 0.8876368199002541, "precision": 0.8889201304482091, "recall": 0.887854213561099, "support": 32431.0 }, "step": 162 }, { "epoch": 3.0, "eval_B-Claim": { "f1-score": 0.5895522388059701, "precision": 0.7214611872146118, "recall": 0.49842271293375395, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8668941979522183, "precision": 0.9202898550724637, "recall": 0.8193548387096774, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8929178470254956, "precision": 0.8365180467091295, "recall": 0.9574726609963548, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6548140728684112, "precision": 0.7178149876475433, "recall": 0.6019797421731123, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.8815622622368754, "precision": 0.9502460360852925, "recall": 0.8221381267738883, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9169943670953343, "precision": 0.8852257181942544, "recall": 0.9511280958330272, "support": 13607.0 }, "eval_O": { "f1-score": 0.9984170774727511, "precision": 0.9998188405797102, "recall": 0.9970192394544305, "support": 11071.0 }, "eval_accuracy": 0.9067250470229101, "eval_loss": 0.25323203206062317, "eval_macro avg": { "f1-score": 0.8287360090652937, "precision": 0.8616249530718579, "recall": 0.8067879166963207, "support": 32431.0 }, "eval_runtime": 4.9025, "eval_samples_per_second": 16.318, "eval_steps_per_second": 2.04, "eval_weighted avg": { "f1-score": 0.9033110548907601, "precision": 0.9034896128542845, "recall": 0.9067250470229101, "support": 32431.0 }, "step": 243 }, { "epoch": 4.0, "eval_B-Claim": { "f1-score": 0.6806136680613668, "precision": 0.61, "recall": 0.7697160883280757, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8861538461538462, "precision": 0.8470588235294118, "recall": 0.9290322580645162, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8687258687258688, "precision": 0.9233926128590971, "recall": 0.8201701093560145, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.656155854589275, "precision": 0.5819380121125757, "recall": 0.7520718232044199, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.8818897637795275, "precision": 0.8408408408408409, "recall": 0.9271523178807947, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.8829758349171871, "precision": 0.9349433218334154, "recall": 0.836481222899978, "support": 13607.0 }, "eval_O": { "f1-score": 0.9971017118014671, "precision": 0.9998183634547271, "recall": 0.9943997832174148, "support": 11071.0 }, "eval_accuracy": 0.8843698930036077, "eval_loss": 0.2791365385055542, "eval_macro avg": { "f1-score": 0.8362309354326484, "precision": 0.8197131392328668, "recall": 0.8612890861358877, "support": 32431.0 }, "eval_runtime": 4.9234, "eval_samples_per_second": 16.249, "eval_steps_per_second": 2.031, "eval_weighted avg": { "f1-score": 0.889158243622726, "precision": 0.8997827813567029, "recall": 0.8843698930036077, "support": 32431.0 }, "step": 324 }, { "epoch": 5.0, "eval_B-Claim": { "f1-score": 0.6078799249530958, "precision": 0.75, "recall": 0.5110410094637224, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8971962616822431, "precision": 0.8674698795180723, "recall": 0.9290322580645162, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.893739230327398, "precision": 0.8474945533769063, "recall": 0.945321992709599, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6119813135476778, "precision": 0.7590320381731425, "recall": 0.5126611418047882, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.8889393595275948, "precision": 0.854958497160332, "recall": 0.9257332071901608, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9137743463765573, "precision": 0.874462654486835, "recall": 0.956786947894466, "support": 13607.0 }, "eval_O": { "f1-score": 0.9976913675252366, "precision": 1.0, "recall": 0.9953933700659381, "support": 11071.0 }, "eval_accuracy": 0.9036724121982054, "eval_loss": 0.3500836193561554, "eval_macro avg": { "f1-score": 0.8301716862771149, "precision": 0.8504882318164697, "recall": 0.8251385610275985, "support": 32431.0 }, "eval_runtime": 4.9239, "eval_samples_per_second": 16.247, "eval_steps_per_second": 2.031, "eval_weighted avg": { "f1-score": 0.8968007164885599, "precision": 0.8986502613295283, "recall": 0.9036724121982054, "support": 32431.0 }, "step": 405 }, { "epoch": 6.0, "eval_B-Claim": { "f1-score": 0.672077922077922, "precision": 0.6923076923076923, "recall": 0.6529968454258676, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8910256410256411, "precision": 0.8853503184713376, "recall": 0.896774193548387, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8907563025210083, "precision": 0.8801897983392646, "recall": 0.9015795868772782, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6666666666666667, "precision": 0.6998228296633764, "recall": 0.6365101289134438, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.8853370396108408, "precision": 0.867453472537449, "recall": 0.9039735099337748, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9098497495826378, "precision": 0.8987595898759589, "recall": 0.9212170206511354, "support": 13607.0 }, "eval_O": { "f1-score": 0.998190209030857, "precision": 1.0, "recall": 0.9963869569144612, "support": 11071.0 }, "eval_accuracy": 0.9043816101877833, "eval_loss": 0.30672115087509155, "eval_macro avg": { "f1-score": 0.8448433615022248, "precision": 0.8462691001707254, "recall": 0.8442054631806213, "support": 32431.0 }, "eval_runtime": 4.9018, "eval_samples_per_second": 16.321, "eval_steps_per_second": 2.04, "eval_weighted avg": { "f1-score": 0.9029367568413356, "precision": 0.9020793621628027, "recall": 0.9043816101877833, "support": 32431.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 0.43724608421325684, "learning_rate": 1.7530864197530865e-05, "loss": 0.2434, "step": 500 }, { "epoch": 7.0, "eval_B-Claim": { "f1-score": 0.6763285024154589, "precision": 0.6907894736842105, "recall": 0.6624605678233438, "support": 317.0 }, "eval_B-MajorClaim": { "f1-score": 0.8952380952380952, "precision": 0.88125, "recall": 0.9096774193548387, "support": 155.0 }, "eval_B-Premise": { "f1-score": 0.8941034897713597, "precision": 0.8855780691299165, "recall": 0.9027946537059538, "support": 823.0 }, "eval_I-Claim": { "f1-score": 0.6960393719240684, "precision": 0.7088305489260143, "recall": 0.6837016574585635, "support": 4344.0 }, "eval_I-MajorClaim": { "f1-score": 0.9140037593984962, "precision": 0.9080298786181139, "recall": 0.9200567644276254, "support": 2114.0 }, "eval_I-Premise": { "f1-score": 0.9164355568530141, "precision": 0.9101253895774444, "recall": 0.9228338355258323, "support": 13607.0 }, "eval_O": { "f1-score": 0.9967376529225193, "precision": 1.0, "recall": 0.9934965224460302, "support": 11071.0 }, "eval_accuracy": 0.9116277635595572, "eval_loss": 0.34577852487564087, "eval_macro avg": { "f1-score": 0.855555204074716, "precision": 0.8549433371336713, "recall": 0.8564316315345982, "support": 32431.0 }, "eval_runtime": 4.8989, "eval_samples_per_second": 16.33, "eval_steps_per_second": 2.041, "eval_weighted avg": { "f1-score": 0.9111536914902465, "precision": 0.910801887328957, "recall": 0.9116277635595572, "support": 32431.0 }, "step": 567 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1006535689026000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }