{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.0, "eval_steps": 500, "global_step": 492, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_B-Claim": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.778263585259213, "precision": 0.643595041322314, "recall": 0.9842022116903634, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5213290460878884, "precision": 0.4563708012760368, "recall": 0.6078480379905024, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.520462355513902, "precision": 0.7011784511784511, "recall": 0.4138102334823646, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8648793805666204, "precision": 0.8778050331607159, "recall": 0.8523288637967537, "support": 11336.0 }, "eval_O": { "f1-score": 0.9947169811320755, "precision": 0.9991081780076697, "recall": 0.9903642149929278, "support": 11312.0 }, "eval_accuracy": 0.8332940582393537, "eval_loss": 0.4083092212677002, "eval_macro avg": { "f1-score": 0.5256644783656713, "precision": 0.5254367864207411, "recall": 0.5497933659932731, "support": 29705.0 }, "eval_runtime": 1.4074, "eval_samples_per_second": 56.843, "eval_steps_per_second": 7.105, "eval_weighted avg": { "f1-score": 0.830926787853407, "precision": 0.8381591322948091, "recall": 0.8332940582393537, "support": 29705.0 }, "step": 41 }, { "epoch": 2.0, "eval_B-Claim": { "f1-score": 0.29931972789115646, "precision": 0.38823529411764707, "recall": 0.24354243542435425, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.2976190476190476, "precision": 0.8620689655172413, "recall": 0.17985611510791366, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8474576271186441, "precision": 0.7422802850356295, "recall": 0.9873617693522907, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.42762951334379906, "precision": 0.5749261291684254, "recall": 0.340414896275931, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7763546798029557, "precision": 0.7699071812408402, "recall": 0.7829110779930452, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8837899073120494, "precision": 0.8290439755777108, "recall": 0.9462773465067043, "support": 11336.0 }, "eval_O": { "f1-score": 0.9997789664471067, "precision": 0.9999115748518879, "recall": 0.9996463932107497, "support": 11312.0 }, "eval_accuracy": 0.8648039050664871, "eval_loss": 0.29589229822158813, "eval_macro avg": { "f1-score": 0.6474213527906798, "precision": 0.7380533436441974, "recall": 0.6400014334101413, "support": 29705.0 }, "eval_runtime": 1.4156, "eval_samples_per_second": 56.514, "eval_steps_per_second": 7.064, "eval_weighted avg": { "f1-score": 0.8503893311871605, "precision": 0.850161508562683, "recall": 0.8648039050664871, "support": 29705.0 }, "step": 82 }, { "epoch": 3.0, "eval_B-Claim": { "f1-score": 0.5958254269449716, "precision": 0.61328125, "recall": 0.5793357933579336, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7816901408450705, "precision": 0.7655172413793103, "recall": 0.7985611510791367, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.88, "precision": 0.8738317757009346, "recall": 0.8862559241706162, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6080141575022121, "precision": 0.6150895140664961, "recall": 0.6010997250687328, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8229895104895104, "precision": 0.7346859149434257, "recall": 0.9354197714853453, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8927189271892718, "precision": 0.9111703104905383, "recall": 0.875, "support": 11336.0 }, "eval_O": { "f1-score": 0.9996020340481981, "precision": 1.0, "recall": 0.9992043847241867, "support": 11312.0 }, "eval_accuracy": 0.8866857431408853, "eval_loss": 0.2633354663848877, "eval_macro avg": { "f1-score": 0.7972628852884621, "precision": 0.7876537152258151, "recall": 0.8106966785551358, "support": 29705.0 }, "eval_runtime": 1.4195, "eval_samples_per_second": 56.358, "eval_steps_per_second": 7.045, "eval_weighted avg": { "f1-score": 0.8868495578802252, "precision": 0.8889636142603039, "recall": 0.8866857431408853, "support": 29705.0 }, "step": 123 }, { "epoch": 4.0, "eval_B-Claim": { "f1-score": 0.6368515205724509, "precision": 0.6180555555555556, "recall": 0.6568265682656826, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.8129032258064516, "precision": 0.7368421052631579, "recall": 0.9064748201438849, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8759244042728019, "precision": 0.9126712328767124, "recall": 0.8420221169036335, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6030037546933668, "precision": 0.6039107545750815, "recall": 0.6020994751312172, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8346312414109024, "precision": 0.7743306417339566, "recall": 0.905116741182315, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8875363616021481, "precision": 0.9007175946952494, "recall": 0.8747353563867325, "support": 11336.0 }, "eval_O": { "f1-score": 0.9999557971975424, "precision": 1.0, "recall": 0.9999115983026874, "support": 11312.0 }, "eval_accuracy": 0.8852045110250799, "eval_loss": 0.27674925327301025, "eval_macro avg": { "f1-score": 0.8072580436508092, "precision": 0.7923611263856734, "recall": 0.8267409537594504, "support": 29705.0 }, "eval_runtime": 1.4101, "eval_samples_per_second": 56.735, "eval_steps_per_second": 7.092, "eval_weighted avg": { "f1-score": 0.8855540596827394, "precision": 0.8868925824921324, "recall": 0.8852045110250799, "support": 29705.0 }, "step": 164 }, { "epoch": 5.0, "eval_B-Claim": { "f1-score": 0.6735042735042734, "precision": 0.6273885350318471, "recall": 0.7269372693726938, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.8514851485148515, "precision": 0.7865853658536586, "recall": 0.9280575539568345, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8740617180984154, "precision": 0.9257950530035336, "recall": 0.8278041074249605, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6534240561896401, "precision": 0.5824691841126981, "recall": 0.744063984003999, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8683019766611098, "precision": 0.8339432753888381, "recall": 0.9056135121708893, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8769748527624568, "precision": 0.9326903957049115, "recall": 0.8275405786873676, "support": 11336.0 }, "eval_O": { "f1-score": 0.9997347245556637, "precision": 1.0, "recall": 0.9994695898161244, "support": 11312.0 }, "eval_accuracy": 0.8866184144083488, "eval_loss": 0.2875027358531952, "eval_macro avg": { "f1-score": 0.8282123928980587, "precision": 0.8126959727279266, "recall": 0.8513552279189812, "support": 29705.0 }, "eval_runtime": 1.4103, "eval_samples_per_second": 56.726, "eval_steps_per_second": 7.091, "eval_weighted avg": { "f1-score": 0.8909875382676978, "precision": 0.90084333519953, "recall": 0.8866184144083488, "support": 29705.0 }, "step": 205 }, { "epoch": 6.0, "eval_B-Claim": { "f1-score": 0.6584070796460177, "precision": 0.6326530612244898, "recall": 0.6863468634686347, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.8421052631578947, "precision": 0.8818897637795275, "recall": 0.8057553956834532, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8780876494023905, "precision": 0.8858520900321544, "recall": 0.8704581358609794, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6172492982903802, "precision": 0.6304404482668752, "recall": 0.6045988502874281, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8752545824847251, "precision": 0.8976501305483029, "recall": 0.8539493293591655, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8908354033206956, "precision": 0.8802859357505813, "recall": 0.9016407904022583, "support": 11336.0 }, "eval_O": { "f1-score": 0.9994250585997965, "precision": 1.0, "recall": 0.9988507779349364, "support": 11312.0 }, "eval_accuracy": 0.8923413566739606, "eval_loss": 0.2945018410682678, "eval_macro avg": { "f1-score": 0.8230520478431285, "precision": 0.829824489943133, "recall": 0.8173714489995507, "support": 29705.0 }, "eval_runtime": 1.4167, "eval_samples_per_second": 56.47, "eval_steps_per_second": 7.059, "eval_weighted avg": { "f1-score": 0.8916619674856285, "precision": 0.8912660947222903, "recall": 0.8923413566739606, "support": 29705.0 }, "step": 246 }, { "epoch": 7.0, "eval_B-Claim": { "f1-score": 0.6748681898066783, "precision": 0.6442953020134228, "recall": 0.7084870848708487, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.8705035971223022, "precision": 0.8705035971223022, "recall": 0.8705035971223022, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8788368336025849, "precision": 0.8991735537190083, "recall": 0.8593996840442338, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6305652826413206, "precision": 0.6310387984981226, "recall": 0.6300924768807799, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8747152619589977, "precision": 0.891640866873065, "recall": 0.8584202682563339, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8918871407225103, "precision": 0.8886845331932037, "recall": 0.8951129146083274, "support": 11336.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_accuracy": 0.8942938899175223, "eval_loss": 0.30371928215026855, "eval_macro avg": { "f1-score": 0.8316251865506278, "precision": 0.832190950202732, "recall": 0.8317165751118323, "support": 29705.0 }, "eval_runtime": 1.4098, "eval_samples_per_second": 56.745, "eval_steps_per_second": 7.093, "eval_weighted avg": { "f1-score": 0.894338297946804, "precision": 0.8944813348740749, "recall": 0.8942938899175223, "support": 29705.0 }, "step": 287 }, { "epoch": 8.0, "eval_B-Claim": { "f1-score": 0.6325757575757576, "precision": 0.6498054474708171, "recall": 0.6162361623616236, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.8602941176470588, "precision": 0.8796992481203008, "recall": 0.841726618705036, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8780108780108781, "precision": 0.863914373088685, "recall": 0.8925750394944708, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5998663994655979, "precision": 0.644374282433984, "recall": 0.5611097225693576, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8755122950819673, "precision": 0.9037546271813856, "recall": 0.8489816194734228, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8923684097636309, "precision": 0.8685594989561587, "recall": 0.9175194071983063, "support": 11336.0 }, "eval_O": { "f1-score": 0.9999557971975424, "precision": 1.0, "recall": 0.9999115983026874, "support": 11312.0 }, "eval_accuracy": 0.892610671604107, "eval_loss": 0.3549477458000183, "eval_macro avg": { "f1-score": 0.8197976649632047, "precision": 0.8300153538930474, "recall": 0.811151452586415, "support": 29705.0 }, "eval_runtime": 1.4225, "eval_samples_per_second": 56.239, "eval_steps_per_second": 7.03, "eval_weighted avg": { "f1-score": 0.8899730612246367, "precision": 0.8887602531095763, "recall": 0.892610671604107, "support": 29705.0 }, "step": 328 }, { "epoch": 9.0, "eval_B-Claim": { "f1-score": 0.6642728904847396, "precision": 0.6468531468531469, "recall": 0.6826568265682657, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.8480565371024734, "precision": 0.8333333333333334, "recall": 0.8633093525179856, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8805132317562149, "precision": 0.8941368078175895, "recall": 0.8672985781990521, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.637490882567469, "precision": 0.6205917159763313, "recall": 0.6553361659585104, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8618255168935955, "precision": 0.8750640040962622, "recall": 0.8489816194734228, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8898067954696869, "precision": 0.8960551033187226, "recall": 0.8836450247000706, "support": 11336.0 }, "eval_O": { "f1-score": 0.9996462681287585, "precision": 1.0, "recall": 0.9992927864214993, "support": 11312.0 }, "eval_accuracy": 0.8923076923076924, "eval_loss": 0.3715985417366028, "eval_macro avg": { "f1-score": 0.8259445889147053, "precision": 0.8237191587707694, "recall": 0.8286457648341152, "support": 29705.0 }, "eval_runtime": 1.4176, "eval_samples_per_second": 56.435, "eval_steps_per_second": 7.054, "eval_weighted avg": { "f1-score": 0.8933030430182268, "precision": 0.8945056752252882, "recall": 0.8923076923076924, "support": 29705.0 }, "step": 369 }, { "epoch": 10.0, "eval_B-Claim": { "f1-score": 0.6461538461538461, "precision": 0.6746987951807228, "recall": 0.6199261992619927, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.852233676975945, "precision": 0.8157894736842105, "recall": 0.8920863309352518, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8847058823529412, "precision": 0.8785046728971962, "recall": 0.8909952606635071, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6189843126827972, "precision": 0.6611758023288838, "recall": 0.5818545363659086, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8606089438629877, "precision": 0.8256503879507074, "recall": 0.8986587183308494, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8974191582887234, "precision": 0.8856627437505369, "recall": 0.9094918842625265, "support": 11336.0 }, "eval_O": { "f1-score": 0.9998673798682641, "precision": 1.0, "recall": 0.9997347949080623, "support": 11312.0 }, "eval_accuracy": 0.8958761151321326, "eval_loss": 0.4143347144126892, "eval_macro avg": { "f1-score": 0.822853314312215, "precision": 0.8202116965417511, "recall": 0.8275353892468712, "support": 29705.0 }, "eval_runtime": 1.4266, "eval_samples_per_second": 56.076, "eval_steps_per_second": 7.01, "eval_weighted avg": { "f1-score": 0.8936607445011815, "precision": 0.8924963153509083, "recall": 0.8958761151321326, "support": 29705.0 }, "step": 410 }, { "epoch": 11.0, "eval_B-Claim": { "f1-score": 0.6748681898066783, "precision": 0.6442953020134228, "recall": 0.7084870848708487, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.8754448398576513, "precision": 0.8661971830985915, "recall": 0.8848920863309353, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8770226537216829, "precision": 0.8988391376451078, "recall": 0.8562401263823065, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6444880923152466, "precision": 0.6332931242460796, "recall": 0.6560859785053736, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8760289348964829, "precision": 0.8797595190380761, "recall": 0.8723298559364133, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8931358637814828, "precision": 0.8979136947218259, "recall": 0.8884086097388849, "support": 11336.0 }, "eval_O": { "f1-score": 0.9996904982977407, "precision": 1.0, "recall": 0.9993811881188119, "support": 11312.0 }, "eval_accuracy": 0.8959434438646693, "eval_loss": 0.4241807460784912, "eval_macro avg": { "f1-score": 0.8343827246681379, "precision": 0.8314711372518719, "recall": 0.8379749899833678, "support": 29705.0 }, "eval_runtime": 1.4169, "eval_samples_per_second": 56.462, "eval_steps_per_second": 7.058, "eval_weighted avg": { "f1-score": 0.8966457372110589, "precision": 0.8974745650471141, "recall": 0.8959434438646693, "support": 29705.0 }, "step": 451 }, { "epoch": 12.0, "eval_B-Claim": { "f1-score": 0.6175869120654396, "precision": 0.6926605504587156, "recall": 0.5571955719557196, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.8695652173913043, "precision": 0.8759124087591241, "recall": 0.8633093525179856, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8819969742813918, "precision": 0.8461538461538461, "recall": 0.9210110584518167, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5798017526217497, "precision": 0.6817567567567567, "recall": 0.5043739065233691, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8793675082887019, "precision": 0.9035639412997903, "recall": 0.8564331843020367, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8959295006294586, "precision": 0.8544101168560909, "recall": 0.9416901905434015, "support": 11336.0 }, "eval_O": { "f1-score": 0.9994250585997965, "precision": 1.0, "recall": 0.9988507779349364, "support": 11312.0 }, "eval_accuracy": 0.8944622117488639, "eval_loss": 0.49107131361961365, "eval_macro avg": { "f1-score": 0.8176675605539775, "precision": 0.8363510886120462, "recall": 0.8061234346041807, "support": 29705.0 }, "eval_runtime": 1.4189, "eval_samples_per_second": 56.383, "eval_steps_per_second": 7.048, "eval_weighted avg": { "f1-score": 0.8886802353660483, "precision": 0.888377522333214, "recall": 0.8944622117488639, "support": 29705.0 }, "step": 492 } ], "logging_steps": 500, "max_steps": 656, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 500, "total_flos": 1725489752616000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }