{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "eval_steps": 500, "global_step": 486, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_B-Claim": { "f1-score": 0.37160751565762, "precision": 0.42788461538461536, "recall": 0.3284132841328413, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.3578947368421052, "precision": 0.6666666666666666, "recall": 0.2446043165467626, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8640915593705293, "precision": 0.7895424836601307, "recall": 0.9541864139020537, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5003402749421533, "precision": 0.5493126120741183, "recall": 0.4593851537115721, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7718093699515347, "precision": 0.6502211636611093, "recall": 0.9493293591654247, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.875016720916752, "precision": 0.8846812731043188, "recall": 0.865561044460127, "support": 11336.0 }, "eval_O": { "f1-score": 0.9992483530087988, "precision": 0.9995577178239717, "recall": 0.998939179632249, "support": 11312.0 }, "eval_accuracy": 0.8614038040733883, "eval_loss": 0.31713685393333435, "eval_macro avg": { "f1-score": 0.6771440758127848, "precision": 0.7096952189107044, "recall": 0.685774107364433, "support": 29705.0 }, "eval_runtime": 4.8338, "eval_samples_per_second": 16.55, "eval_steps_per_second": 2.069, "eval_weighted avg": { "f1-score": 0.8576207231627551, "precision": 0.8601529227027923, "recall": 0.8614038040733883, "support": 29705.0 }, "step": 81 }, { "epoch": 2.0, "eval_B-Claim": { "f1-score": 0.4708624708624709, "precision": 0.6392405063291139, "recall": 0.3726937269372694, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.796875, "precision": 0.8717948717948718, "recall": 0.7338129496402878, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8736616702355461, "precision": 0.796875, "recall": 0.966824644549763, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5100589925881107, "precision": 0.6459770114942529, "recall": 0.4213946513371657, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8401387776888176, "precision": 0.9077277970011534, "recall": 0.7819175360158966, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8912891699864469, "precision": 0.8338584492430646, "recall": 0.9572159491884262, "support": 11336.0 }, "eval_O": { "f1-score": 0.9996904982977407, "precision": 1.0, "recall": 0.9993811881188119, "support": 11312.0 }, "eval_accuracy": 0.8830499915839084, "eval_loss": 0.2966194748878479, "eval_macro avg": { "f1-score": 0.7689395113798762, "precision": 0.8136390908374939, "recall": 0.7476058065410885, "support": 29705.0 }, "eval_runtime": 4.8625, "eval_samples_per_second": 16.452, "eval_steps_per_second": 2.057, "eval_weighted avg": { "f1-score": 0.8731020208182413, "precision": 0.874440834821272, "recall": 0.8830499915839084, "support": 29705.0 }, "step": 162 }, { "epoch": 3.0, "eval_B-Claim": { "f1-score": 0.6085192697768763, "precision": 0.6756756756756757, "recall": 0.5535055350553506, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.8571428571428571, "precision": 0.851063829787234, "recall": 0.8633093525179856, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8834729626808834, "precision": 0.8529411764705882, "recall": 0.9162717219589257, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5764474423833614, "precision": 0.6584269662921348, "recall": 0.5126218445388653, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8581151832460733, "precision": 0.9070282235749861, "recall": 0.8142076502732241, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8959744247675935, "precision": 0.8563158317922328, "recall": 0.939484827099506, "support": 11336.0 }, "eval_O": { "f1-score": 0.9996020340481981, "precision": 1.0, "recall": 0.9992043847241867, "support": 11312.0 }, "eval_accuracy": 0.8918700555462044, "eval_loss": 0.2552729547023773, "eval_macro avg": { "f1-score": 0.811324882006549, "precision": 0.8287788147989789, "recall": 0.7998007594525776, "support": 29705.0 }, "eval_runtime": 4.8422, "eval_samples_per_second": 16.522, "eval_steps_per_second": 2.065, "eval_weighted avg": { "f1-score": 0.8867633844066056, "precision": 0.886070631898416, "recall": 0.8918700555462044, "support": 29705.0 }, "step": 243 }, { "epoch": 4.0, "eval_B-Claim": { "f1-score": 0.6722408026755852, "precision": 0.6146788990825688, "recall": 0.7416974169741697, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.8664259927797834, "precision": 0.8695652173913043, "recall": 0.8633093525179856, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8687035507844755, "precision": 0.9100346020761245, "recall": 0.8309636650868878, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6483151400094921, "precision": 0.6171222046532641, "recall": 0.6828292926768308, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8696993060909791, "precision": 0.9009584664536742, "recall": 0.8405365126676602, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8906159274643798, "precision": 0.9020175517958925, "recall": 0.879498941425547, "support": 11336.0 }, "eval_O": { "f1-score": 0.999557835160948, "precision": 0.9999115357395613, "recall": 0.9992043847241867, "support": 11312.0 }, "eval_accuracy": 0.8935869382258879, "eval_loss": 0.29465603828430176, "eval_macro avg": { "f1-score": 0.8307940792808061, "precision": 0.8306126395989127, "recall": 0.8340056522961811, "support": 29705.0 }, "eval_runtime": 4.8494, "eval_samples_per_second": 16.497, "eval_steps_per_second": 2.062, "eval_weighted avg": { "f1-score": 0.8954766464091573, "precision": 0.8982496227307208, "recall": 0.8935869382258879, "support": 29705.0 }, "step": 324 }, { "epoch": 5.0, "eval_B-Claim": { "f1-score": 0.6438095238095239, "precision": 0.6653543307086615, "recall": 0.6236162361623616, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.8785714285714286, "precision": 0.8723404255319149, "recall": 0.8848920863309353, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8783151326053042, "precision": 0.8674884437596302, "recall": 0.8894154818325435, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6283729628640129, "precision": 0.6748923959827834, "recall": 0.5878530367408148, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.885450461692039, "precision": 0.8896690070210632, "recall": 0.8812717337307501, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8989315871101154, "precision": 0.878494442573257, "recall": 0.9203422724064926, "support": 11336.0 }, "eval_O": { "f1-score": 0.9997347245556637, "precision": 1.0, "recall": 0.9994695898161244, "support": 11312.0 }, "eval_accuracy": 0.8995118666891095, "eval_loss": 0.3176642060279846, "eval_macro avg": { "f1-score": 0.830455117315441, "precision": 0.8354627207967587, "recall": 0.8266943481457174, "support": 29705.0 }, "eval_runtime": 4.8682, "eval_samples_per_second": 16.433, "eval_steps_per_second": 2.054, "eval_weighted avg": { "f1-score": 0.8971010593476484, "precision": 0.8958911872123141, "recall": 0.8995118666891095, "support": 29705.0 }, "step": 405 }, { "epoch": 6.0, "eval_B-Claim": { "f1-score": 0.6150712830957231, "precision": 0.6863636363636364, "recall": 0.5571955719557196, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.8686131386861314, "precision": 0.8814814814814815, "recall": 0.8561151079136691, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8804841149773072, "precision": 0.8447024673439768, "recall": 0.919431279620853, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5758052970651396, "precision": 0.6739276139410187, "recall": 0.5026243439140214, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8627552339105711, "precision": 0.8992456896551724, "recall": 0.829110779930452, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.894107779408681, "precision": 0.8521864257734432, "recall": 0.9403669724770642, "support": 11336.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_accuracy": 0.8922403635751557, "eval_loss": 0.41071853041648865, "eval_macro avg": { "f1-score": 0.8138338353062219, "precision": 0.8339867592226755, "recall": 0.8006920079731114, "support": 29705.0 }, "eval_runtime": 4.8571, "eval_samples_per_second": 16.471, "eval_steps_per_second": 2.059, "eval_weighted avg": { "f1-score": 0.8864802913843919, "precision": 0.8861194550557427, "recall": 0.8922403635751557, "support": 29705.0 }, "step": 486 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 862744876308000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }