diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 100.0, + "eval_steps": 500, + "global_step": 9600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "grad_norm": 1.569076418876648, + "learning_rate": 4.9500000000000004e-05, + "loss": 1.1639, + "step": 96 + }, + { + "epoch": 1.0, + "eval_LOCATION_f1": 0.0, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.0, + "eval_LOCATION_recall": 0.0, + "eval_ORGANIZATION_f1": 0.0, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.0, + "eval_ORGANIZATION_recall": 0.0, + "eval_PERSON_f1": 0.0, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.0, + "eval_PERSON_recall": 0.0, + "eval_loss": 0.6982586979866028, + "eval_overall_accuracy": 0.8373377520022093, + "eval_overall_f1": 0.0, + "eval_overall_precision": 0.0, + "eval_overall_recall": 0.0, + "eval_runtime": 0.5919, + "eval_samples_per_second": 287.192, + "eval_steps_per_second": 5.068, + "step": 96 + }, + { + "epoch": 2.0, + "grad_norm": 0.9749519228935242, + "learning_rate": 4.9e-05, + "loss": 0.6685, + "step": 192 + }, + { + "epoch": 2.0, + "eval_LOCATION_f1": 0.0, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.0, + "eval_LOCATION_recall": 0.0, + "eval_ORGANIZATION_f1": 0.0, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.0, + "eval_ORGANIZATION_recall": 0.0, + "eval_PERSON_f1": 0.0, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.0, + "eval_PERSON_recall": 0.0, + "eval_loss": 0.565019428730011, + "eval_overall_accuracy": 0.8376139188069595, + "eval_overall_f1": 0.0, + "eval_overall_precision": 0.0, + "eval_overall_recall": 0.0, + "eval_runtime": 0.6878, + "eval_samples_per_second": 247.148, + "eval_steps_per_second": 4.361, + "step": 192 + }, + { + "epoch": 3.0, + "grad_norm": 1.0692148208618164, + "learning_rate": 4.85e-05, + "loss": 0.553, + "step": 288 + }, + { + "epoch": 3.0, + "eval_LOCATION_f1": 0.0, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.0, + "eval_LOCATION_recall": 0.0, + "eval_ORGANIZATION_f1": 0.07407407407407407, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.3888888888888889, + "eval_ORGANIZATION_recall": 0.04093567251461988, + "eval_PERSON_f1": 0.10526315789473685, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.225, + "eval_PERSON_recall": 0.06870229007633588, + "eval_loss": 0.4425106644630432, + "eval_overall_accuracy": 0.8470035901684617, + "eval_overall_f1": 0.06866952789699571, + "eval_overall_precision": 0.26229508196721313, + "eval_overall_recall": 0.03950617283950617, + "eval_runtime": 0.6213, + "eval_samples_per_second": 273.628, + "eval_steps_per_second": 4.829, + "step": 288 + }, + { + "epoch": 4.0, + "grad_norm": 0.8610777258872986, + "learning_rate": 4.8e-05, + "loss": 0.4403, + "step": 384 + }, + { + "epoch": 4.0, + "eval_LOCATION_f1": 0.078125, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.2, + "eval_LOCATION_recall": 0.04854368932038835, + "eval_ORGANIZATION_f1": 0.29078014184397166, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.36936936936936937, + "eval_ORGANIZATION_recall": 0.23976608187134502, + "eval_PERSON_f1": 0.4290657439446367, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.3924050632911392, + "eval_PERSON_recall": 0.4732824427480916, + "eval_loss": 0.33291852474212646, + "eval_overall_accuracy": 0.8834576083954708, + "eval_overall_f1": 0.3090128755364807, + "eval_overall_precision": 0.3673469387755102, + "eval_overall_recall": 0.26666666666666666, + "eval_runtime": 0.6305, + "eval_samples_per_second": 269.628, + "eval_steps_per_second": 4.758, + "step": 384 + }, + { + "epoch": 5.0, + "grad_norm": 0.5931413769721985, + "learning_rate": 4.75e-05, + "loss": 0.3288, + "step": 480 + }, + { + "epoch": 5.0, + "eval_LOCATION_f1": 0.3715846994535519, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.425, + "eval_LOCATION_recall": 0.3300970873786408, + "eval_ORGANIZATION_f1": 0.5683646112600537, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.5247524752475248, + "eval_ORGANIZATION_recall": 0.6198830409356725, + "eval_PERSON_f1": 0.6107382550335572, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.5449101796407185, + "eval_PERSON_recall": 0.6946564885496184, + "eval_loss": 0.24550960958003998, + "eval_overall_accuracy": 0.9262634631317316, + "eval_overall_f1": 0.540983606557377, + "eval_overall_precision": 0.5144766146993318, + "eval_overall_recall": 0.5703703703703704, + "eval_runtime": 0.6072, + "eval_samples_per_second": 279.961, + "eval_steps_per_second": 4.94, + "step": 480 + }, + { + "epoch": 6.0, + "grad_norm": 1.293260097503662, + "learning_rate": 4.7e-05, + "loss": 0.2474, + "step": 576 + }, + { + "epoch": 6.0, + "eval_LOCATION_f1": 0.626865671641791, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.6428571428571429, + "eval_LOCATION_recall": 0.6116504854368932, + "eval_ORGANIZATION_f1": 0.6863270777479892, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.6336633663366337, + "eval_ORGANIZATION_recall": 0.7485380116959064, + "eval_PERSON_f1": 0.833922261484099, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.7763157894736842, + "eval_PERSON_recall": 0.9007633587786259, + "eval_loss": 0.18931446969509125, + "eval_overall_accuracy": 0.9505661419497377, + "eval_overall_f1": 0.721120186697783, + "eval_overall_precision": 0.6836283185840708, + "eval_overall_recall": 0.762962962962963, + "eval_runtime": 0.5962, + "eval_samples_per_second": 285.148, + "eval_steps_per_second": 5.032, + "step": 576 + }, + { + "epoch": 7.0, + "grad_norm": 1.2653672695159912, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.1962, + "step": 672 + }, + { + "epoch": 7.0, + "eval_LOCATION_f1": 0.7551020408163266, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.7956989247311828, + "eval_LOCATION_recall": 0.7184466019417476, + "eval_ORGANIZATION_f1": 0.7526881720430108, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.6965174129353234, + "eval_ORGANIZATION_recall": 0.8187134502923976, + "eval_PERSON_f1": 0.9157509157509158, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.8802816901408451, + "eval_PERSON_recall": 0.9541984732824428, + "eval_loss": 0.1533629298210144, + "eval_overall_accuracy": 0.9605081469207402, + "eval_overall_f1": 0.8061831153388822, + "eval_overall_precision": 0.7775229357798165, + "eval_overall_recall": 0.837037037037037, + "eval_runtime": 0.5884, + "eval_samples_per_second": 288.903, + "eval_steps_per_second": 5.098, + "step": 672 + }, + { + "epoch": 8.0, + "grad_norm": 0.9401953816413879, + "learning_rate": 4.600000000000001e-05, + "loss": 0.1659, + "step": 768 + }, + { + "epoch": 8.0, + "eval_LOCATION_f1": 0.7980295566502462, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.81, + "eval_LOCATION_recall": 0.7864077669902912, + "eval_ORGANIZATION_f1": 0.7899159663865545, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.7580645161290323, + "eval_ORGANIZATION_recall": 0.8245614035087719, + "eval_PERSON_f1": 0.9328358208955224, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9124087591240876, + "eval_PERSON_recall": 0.9541984732824428, + "eval_loss": 0.12766310572624207, + "eval_overall_accuracy": 0.9652029826014913, + "eval_overall_f1": 0.8381642512077295, + "eval_overall_precision": 0.8203309692671394, + "eval_overall_recall": 0.8567901234567902, + "eval_runtime": 0.5896, + "eval_samples_per_second": 288.354, + "eval_steps_per_second": 5.089, + "step": 768 + }, + { + "epoch": 9.0, + "grad_norm": 1.0895642042160034, + "learning_rate": 4.55e-05, + "loss": 0.1495, + "step": 864 + }, + { + "epoch": 9.0, + "eval_LOCATION_f1": 0.835820895522388, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8571428571428571, + "eval_LOCATION_recall": 0.8155339805825242, + "eval_ORGANIZATION_f1": 0.8099173553719009, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.765625, + "eval_ORGANIZATION_recall": 0.8596491228070176, + "eval_PERSON_f1": 0.9338235294117646, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.900709219858156, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.11188717931509018, + "eval_overall_accuracy": 0.9690693178679922, + "eval_overall_f1": 0.8564593301435406, + "eval_overall_precision": 0.8306264501160093, + "eval_overall_recall": 0.8839506172839506, + "eval_runtime": 0.6012, + "eval_samples_per_second": 282.766, + "eval_steps_per_second": 4.99, + "step": 864 + }, + { + "epoch": 10.0, + "grad_norm": 1.217275857925415, + "learning_rate": 4.5e-05, + "loss": 0.1342, + "step": 960 + }, + { + "epoch": 10.0, + "eval_LOCATION_f1": 0.8374384236453202, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.85, + "eval_LOCATION_recall": 0.8252427184466019, + "eval_ORGANIZATION_f1": 0.8277777777777778, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.7883597883597884, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.929889298892989, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.10086647421121597, + "eval_overall_accuracy": 0.9707263186964927, + "eval_overall_f1": 0.8633093525179856, + "eval_overall_precision": 0.8391608391608392, + "eval_overall_recall": 0.8888888888888888, + "eval_runtime": 0.6119, + "eval_samples_per_second": 277.843, + "eval_steps_per_second": 4.903, + "step": 960 + }, + { + "epoch": 11.0, + "grad_norm": 2.0349202156066895, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.1241, + "step": 1056 + }, + { + "epoch": 11.0, + "eval_LOCATION_f1": 0.8365384615384616, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8285714285714286, + "eval_LOCATION_recall": 0.8446601941747572, + "eval_ORGANIZATION_f1": 0.8342857142857143, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8156424581005587, + "eval_ORGANIZATION_recall": 0.8538011695906432, + "eval_PERSON_f1": 0.9438202247191011, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9264705882352942, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.09149681031703949, + "eval_overall_accuracy": 0.9726594863297432, + "eval_overall_f1": 0.8703030303030304, + "eval_overall_precision": 0.8547619047619047, + "eval_overall_recall": 0.8864197530864197, + "eval_runtime": 0.5981, + "eval_samples_per_second": 284.231, + "eval_steps_per_second": 5.016, + "step": 1056 + }, + { + "epoch": 12.0, + "grad_norm": 3.014051914215088, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.1188, + "step": 1152 + }, + { + "epoch": 12.0, + "eval_LOCATION_f1": 0.8309178743961353, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8269230769230769, + "eval_LOCATION_recall": 0.8349514563106796, + "eval_ORGANIZATION_f1": 0.8579545454545455, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8342541436464088, + "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_PERSON_f1": 0.9402985074626866, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9197080291970803, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.08539355546236038, + "eval_overall_accuracy": 0.9745926539629937, + "eval_overall_f1": 0.8778718258766627, + "eval_overall_precision": 0.8601895734597157, + "eval_overall_recall": 0.8962962962962963, + "eval_runtime": 0.5994, + "eval_samples_per_second": 283.636, + "eval_steps_per_second": 5.005, + "step": 1152 + }, + { + "epoch": 13.0, + "grad_norm": 2.8539180755615234, + "learning_rate": 4.35e-05, + "loss": 0.1102, + "step": 1248 + }, + { + "epoch": 13.0, + "eval_LOCATION_f1": 0.8695652173913043, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8653846153846154, + "eval_LOCATION_recall": 0.8737864077669902, + "eval_ORGANIZATION_f1": 0.866096866096866, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8444444444444444, + "eval_ORGANIZATION_recall": 0.8888888888888888, + "eval_PERSON_f1": 0.9473684210526315, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9333333333333333, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.07981602847576141, + "eval_overall_accuracy": 0.9762496547914941, + "eval_overall_f1": 0.8932038834951457, + "eval_overall_precision": 0.8782816229116945, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.598, + "eval_samples_per_second": 284.265, + "eval_steps_per_second": 5.016, + "step": 1248 + }, + { + "epoch": 14.0, + "grad_norm": 0.624473512172699, + "learning_rate": 4.3e-05, + "loss": 0.1044, + "step": 1344 + }, + { + "epoch": 14.0, + "eval_LOCATION_f1": 0.8768472906403941, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.89, + "eval_LOCATION_recall": 0.8640776699029126, + "eval_ORGANIZATION_f1": 0.8830409356725146, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8830409356725146, + "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_PERSON_f1": 0.9509433962264152, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9402985074626866, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.07802519202232361, + "eval_overall_accuracy": 0.9781828224247445, + "eval_overall_f1": 0.9037037037037037, + "eval_overall_precision": 0.9037037037037037, + "eval_overall_recall": 0.9037037037037037, + "eval_runtime": 0.5951, + "eval_samples_per_second": 285.656, + "eval_steps_per_second": 5.041, + "step": 1344 + }, + { + "epoch": 15.0, + "grad_norm": 0.996662437915802, + "learning_rate": 4.25e-05, + "loss": 0.1009, + "step": 1440 + }, + { + "epoch": 15.0, + "eval_LOCATION_f1": 0.891089108910891, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.9090909090909091, + "eval_LOCATION_recall": 0.8737864077669902, + "eval_ORGANIZATION_f1": 0.8788732394366197, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8478260869565217, + "eval_ORGANIZATION_recall": 0.9122807017543859, + "eval_PERSON_f1": 0.9584905660377357, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9477611940298507, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.07211567461490631, + "eval_overall_accuracy": 0.9781828224247445, + "eval_overall_f1": 0.9075425790754258, + "eval_overall_precision": 0.894484412470024, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.5925, + "eval_samples_per_second": 286.903, + "eval_steps_per_second": 5.063, + "step": 1440 + }, + { + "epoch": 16.0, + "grad_norm": 1.0493874549865723, + "learning_rate": 4.2e-05, + "loss": 0.0978, + "step": 1536 + }, + { + "epoch": 16.0, + "eval_LOCATION_f1": 0.8932038834951457, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8932038834951457, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8725212464589235, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8461538461538461, + "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_PERSON_f1": 0.9509433962264152, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9402985074626866, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.06955698877573013, + "eval_overall_accuracy": 0.9779066556199945, + "eval_overall_f1": 0.9029126213592233, + "eval_overall_precision": 0.8878281622911695, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.6569, + "eval_samples_per_second": 258.806, + "eval_steps_per_second": 4.567, + "step": 1536 + }, + { + "epoch": 17.0, + "grad_norm": 0.8617897629737854, + "learning_rate": 4.15e-05, + "loss": 0.0962, + "step": 1632 + }, + { + "epoch": 17.0, + "eval_LOCATION_f1": 0.9064039408866995, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.92, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8869565217391304, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8793103448275862, + "eval_ORGANIZATION_recall": 0.8947368421052632, + "eval_PERSON_f1": 0.962121212121212, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9548872180451128, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.06796535849571228, + "eval_overall_accuracy": 0.980115990057995, + "eval_overall_f1": 0.916256157635468, + "eval_overall_precision": 0.914004914004914, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.6013, + "eval_samples_per_second": 282.726, + "eval_steps_per_second": 4.989, + "step": 1632 + }, + { + "epoch": 18.0, + "grad_norm": 0.8590739965438843, + "learning_rate": 4.1e-05, + "loss": 0.0909, + "step": 1728 + }, + { + "epoch": 18.0, + "eval_LOCATION_f1": 0.896551724137931, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.91, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8831908831908832, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8611111111111112, + "eval_ORGANIZATION_recall": 0.9064327485380117, + "eval_PERSON_f1": 0.9509433962264152, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9402985074626866, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.0644010454416275, + "eval_overall_accuracy": 0.9798398232532449, + "eval_overall_f1": 0.9084249084249084, + "eval_overall_precision": 0.8985507246376812, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.5942, + "eval_samples_per_second": 286.117, + "eval_steps_per_second": 5.049, + "step": 1728 + }, + { + "epoch": 19.0, + "grad_norm": 2.493396520614624, + "learning_rate": 4.05e-05, + "loss": 0.088, + "step": 1824 + }, + { + "epoch": 19.0, + "eval_LOCATION_f1": 0.9009900990099011, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.9191919191919192, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8908045977011494, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8757062146892656, + "eval_ORGANIZATION_recall": 0.9064327485380117, + "eval_PERSON_f1": 0.9545454545454546, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.0634172335267067, + "eval_overall_accuracy": 0.9806683236674951, + "eval_overall_f1": 0.914004914004914, + "eval_overall_precision": 0.9095354523227384, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.5943, + "eval_samples_per_second": 286.044, + "eval_steps_per_second": 5.048, + "step": 1824 + }, + { + "epoch": 20.0, + "grad_norm": 0.5532075762748718, + "learning_rate": 4e-05, + "loss": 0.0836, + "step": 1920 + }, + { + "epoch": 20.0, + "eval_LOCATION_f1": 0.8888888888888888, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8846153846153846, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8863636363636364, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.861878453038674, + "eval_ORGANIZATION_recall": 0.9122807017543859, + "eval_PERSON_f1": 0.9509433962264152, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9402985074626866, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.06357116252183914, + "eval_overall_accuracy": 0.9798398232532449, + "eval_overall_f1": 0.9077669902912622, + "eval_overall_precision": 0.8926014319809069, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.5958, + "eval_samples_per_second": 285.346, + "eval_steps_per_second": 5.036, + "step": 1920 + }, + { + "epoch": 21.0, + "grad_norm": 1.6820136308670044, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.0836, + "step": 2016 + }, + { + "epoch": 21.0, + "eval_LOCATION_f1": 0.8888888888888888, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8846153846153846, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8760806916426512, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8636363636363636, + "eval_ORGANIZATION_recall": 0.8888888888888888, + "eval_PERSON_f1": 0.9545454545454546, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.05993271991610527, + "eval_overall_accuracy": 0.9806683236674951, + "eval_overall_f1": 0.9046454767726161, + "eval_overall_precision": 0.8958837772397095, + "eval_overall_recall": 0.9135802469135802, + "eval_runtime": 0.5978, + "eval_samples_per_second": 284.362, + "eval_steps_per_second": 5.018, + "step": 2016 + }, + { + "epoch": 22.0, + "grad_norm": 0.9439449906349182, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.0827, + "step": 2112 + }, + { + "epoch": 22.0, + "eval_LOCATION_f1": 0.8761904761904761, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8598130841121495, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8757062146892655, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8469945355191257, + "eval_ORGANIZATION_recall": 0.9064327485380117, + "eval_PERSON_f1": 0.9509433962264152, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9402985074626866, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.05957403779029846, + "eval_overall_accuracy": 0.9798398232532449, + "eval_overall_f1": 0.8998793727382389, + "eval_overall_precision": 0.8797169811320755, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.5967, + "eval_samples_per_second": 284.901, + "eval_steps_per_second": 5.028, + "step": 2112 + }, + { + "epoch": 23.0, + "grad_norm": 1.140651822090149, + "learning_rate": 3.85e-05, + "loss": 0.077, + "step": 2208 + }, + { + "epoch": 23.0, + "eval_LOCATION_f1": 0.8750000000000001, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8666666666666667, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8818443804034583, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8693181818181818, + "eval_ORGANIZATION_recall": 0.8947368421052632, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.058286890387535095, + "eval_overall_accuracy": 0.9806683236674951, + "eval_overall_f1": 0.9070904645476773, + "eval_overall_precision": 0.8983050847457628, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.6001, + "eval_samples_per_second": 283.301, + "eval_steps_per_second": 4.999, + "step": 2208 + }, + { + "epoch": 24.0, + "grad_norm": 0.9434022307395935, + "learning_rate": 3.8e-05, + "loss": 0.0774, + "step": 2304 + }, + { + "epoch": 24.0, + "eval_LOCATION_f1": 0.861111111111111, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8230088495575221, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8529411764705882, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8579881656804734, + "eval_ORGANIZATION_recall": 0.847953216374269, + "eval_PERSON_f1": 0.9545454545454546, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.05800532177090645, + "eval_overall_accuracy": 0.9787351560342447, + "eval_overall_f1": 0.8878048780487805, + "eval_overall_precision": 0.8771084337349397, + "eval_overall_recall": 0.8987654320987655, + "eval_runtime": 0.6032, + "eval_samples_per_second": 281.813, + "eval_steps_per_second": 4.973, + "step": 2304 + }, + { + "epoch": 25.0, + "grad_norm": 1.0291742086410522, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0782, + "step": 2400 + }, + { + "epoch": 25.0, + "eval_LOCATION_f1": 0.8792270531400966, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.875, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8816568047337278, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8922155688622755, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9545454545454546, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.05705862119793892, + "eval_overall_accuracy": 0.9798398232532449, + "eval_overall_f1": 0.9048207663782447, + "eval_overall_precision": 0.905940594059406, + "eval_overall_recall": 0.9037037037037037, + "eval_runtime": 0.6018, + "eval_samples_per_second": 282.474, + "eval_steps_per_second": 4.985, + "step": 2400 + }, + { + "epoch": 26.0, + "grad_norm": 0.6928281784057617, + "learning_rate": 3.7e-05, + "loss": 0.0771, + "step": 2496 + }, + { + "epoch": 26.0, + "eval_LOCATION_f1": 0.8975609756097561, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.9019607843137255, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8977272727272728, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8729281767955801, + "eval_ORGANIZATION_recall": 0.9239766081871345, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.055970560759305954, + "eval_overall_accuracy": 0.9820491576912456, + "eval_overall_f1": 0.9195121951219513, + "eval_overall_precision": 0.908433734939759, + "eval_overall_recall": 0.9308641975308642, + "eval_runtime": 0.6049, + "eval_samples_per_second": 281.02, + "eval_steps_per_second": 4.959, + "step": 2496 + }, + { + "epoch": 27.0, + "grad_norm": 1.4192240238189697, + "learning_rate": 3.65e-05, + "loss": 0.0726, + "step": 2592 + }, + { + "epoch": 27.0, + "eval_LOCATION_f1": 0.9064039408866995, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.92, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.9008498583569404, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8736263736263736, + "eval_ORGANIZATION_recall": 0.9298245614035088, + "eval_PERSON_f1": 0.9545454545454546, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.05344228819012642, + "eval_overall_accuracy": 0.9826014913007457, + "eval_overall_f1": 0.9195121951219513, + "eval_overall_precision": 0.908433734939759, + "eval_overall_recall": 0.9308641975308642, + "eval_runtime": 0.605, + "eval_samples_per_second": 281.0, + "eval_steps_per_second": 4.959, + "step": 2592 + }, + { + "epoch": 28.0, + "grad_norm": 1.0541571378707886, + "learning_rate": 3.6e-05, + "loss": 0.0712, + "step": 2688 + }, + { + "epoch": 28.0, + "eval_LOCATION_f1": 0.8932038834951457, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8932038834951457, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8939828080229225, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8764044943820225, + "eval_ORGANIZATION_recall": 0.9122807017543859, + "eval_PERSON_f1": 0.9545454545454546, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.05282684788107872, + "eval_overall_accuracy": 0.9826014913007457, + "eval_overall_f1": 0.9133089133089134, + "eval_overall_precision": 0.9033816425120773, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.6016, + "eval_samples_per_second": 282.587, + "eval_steps_per_second": 4.987, + "step": 2688 + }, + { + "epoch": 29.0, + "grad_norm": 0.6951781511306763, + "learning_rate": 3.55e-05, + "loss": 0.0703, + "step": 2784 + }, + { + "epoch": 29.0, + "eval_LOCATION_f1": 0.9009900990099011, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.9191919191919192, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.9008498583569404, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8736263736263736, + "eval_ORGANIZATION_recall": 0.9298245614035088, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.05054891109466553, + "eval_overall_accuracy": 0.9845346589339962, + "eval_overall_f1": 0.9217603911980441, + "eval_overall_precision": 0.9128329297820823, + "eval_overall_recall": 0.9308641975308642, + "eval_runtime": 0.5939, + "eval_samples_per_second": 286.258, + "eval_steps_per_second": 5.052, + "step": 2784 + }, + { + "epoch": 30.0, + "grad_norm": 0.4239547550678253, + "learning_rate": 3.5e-05, + "loss": 0.0649, + "step": 2880 + }, + { + "epoch": 30.0, + "eval_LOCATION_f1": 0.9054726368159205, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.9285714285714286, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.9065155807365438, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8791208791208791, + "eval_ORGANIZATION_recall": 0.935672514619883, + "eval_PERSON_f1": 0.9545454545454546, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.05008954927325249, + "eval_overall_accuracy": 0.983706158519746, + "eval_overall_f1": 0.9217603911980441, + "eval_overall_precision": 0.9128329297820823, + "eval_overall_recall": 0.9308641975308642, + "eval_runtime": 0.606, + "eval_samples_per_second": 280.539, + "eval_steps_per_second": 4.951, + "step": 2880 + }, + { + "epoch": 31.0, + "grad_norm": 1.2522234916687012, + "learning_rate": 3.45e-05, + "loss": 0.0642, + "step": 2976 + }, + { + "epoch": 31.0, + "eval_LOCATION_f1": 0.8720379146919431, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8518518518518519, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8786127167630058, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8685714285714285, + "eval_ORGANIZATION_recall": 0.8888888888888888, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.05007793754339218, + "eval_overall_accuracy": 0.9820491576912456, + "eval_overall_f1": 0.9048780487804877, + "eval_overall_precision": 0.8939759036144578, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.5983, + "eval_samples_per_second": 284.148, + "eval_steps_per_second": 5.014, + "step": 2976 + }, + { + "epoch": 32.0, + "grad_norm": 0.8345080018043518, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.0664, + "step": 3072 + }, + { + "epoch": 32.0, + "eval_LOCATION_f1": 0.8625592417061612, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8425925925925926, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8835820895522388, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.9024390243902439, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9545454545454546, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.05058050900697708, + "eval_overall_accuracy": 0.9817729908864954, + "eval_overall_f1": 0.9012345679012346, + "eval_overall_precision": 0.9012345679012346, + "eval_overall_recall": 0.9012345679012346, + "eval_runtime": 0.6023, + "eval_samples_per_second": 282.266, + "eval_steps_per_second": 4.981, + "step": 3072 + }, + { + "epoch": 33.0, + "grad_norm": 0.7092554569244385, + "learning_rate": 3.35e-05, + "loss": 0.0659, + "step": 3168 + }, + { + "epoch": 33.0, + "eval_LOCATION_f1": 0.8584905660377358, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8348623853211009, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8842729970326408, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8975903614457831, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.0509335920214653, + "eval_overall_accuracy": 0.9820491576912456, + "eval_overall_f1": 0.9039408866995073, + "eval_overall_precision": 0.9017199017199017, + "eval_overall_recall": 0.9061728395061729, + "eval_runtime": 0.5971, + "eval_samples_per_second": 284.704, + "eval_steps_per_second": 5.024, + "step": 3168 + }, + { + "epoch": 34.0, + "grad_norm": 0.8459410071372986, + "learning_rate": 3.3e-05, + "loss": 0.0667, + "step": 3264 + }, + { + "epoch": 34.0, + "eval_LOCATION_f1": 0.883495145631068, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.883495145631068, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8908045977011494, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8757062146892656, + "eval_ORGANIZATION_recall": 0.9064327485380117, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.050714682787656784, + "eval_overall_accuracy": 0.9826014913007457, + "eval_overall_f1": 0.9130966952264382, + "eval_overall_precision": 0.9053398058252428, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.5977, + "eval_samples_per_second": 284.432, + "eval_steps_per_second": 5.019, + "step": 3264 + }, + { + "epoch": 35.0, + "grad_norm": 1.9158438444137573, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.0639, + "step": 3360 + }, + { + "epoch": 35.0, + "eval_LOCATION_f1": 0.8761904761904761, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8598130841121495, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8934911242603552, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.9041916167664671, + "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.0509558841586113, + "eval_overall_accuracy": 0.9826014913007457, + "eval_overall_f1": 0.9124537607891491, + "eval_overall_precision": 0.9113300492610837, + "eval_overall_recall": 0.9135802469135802, + "eval_runtime": 0.6048, + "eval_samples_per_second": 281.07, + "eval_steps_per_second": 4.96, + "step": 3360 + }, + { + "epoch": 36.0, + "grad_norm": 1.0001205205917358, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.0632, + "step": 3456 + }, + { + "epoch": 36.0, + "eval_LOCATION_f1": 0.8846153846153847, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8761904761904762, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.9032258064516129, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.9058823529411765, + "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.050493232905864716, + "eval_overall_accuracy": 0.983706158519746, + "eval_overall_f1": 0.91871921182266, + "eval_overall_precision": 0.9164619164619164, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.5977, + "eval_samples_per_second": 284.414, + "eval_steps_per_second": 5.019, + "step": 3456 + }, + { + "epoch": 37.0, + "grad_norm": 2.2751259803771973, + "learning_rate": 3.15e-05, + "loss": 0.0632, + "step": 3552 + }, + { + "epoch": 37.0, + "eval_LOCATION_f1": 0.9054726368159205, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.9285714285714286, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.9132947976878613, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.9028571428571428, + "eval_ORGANIZATION_recall": 0.9239766081871345, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04872192069888115, + "eval_overall_accuracy": 0.9845346589339962, + "eval_overall_f1": 0.928395061728395, + "eval_overall_precision": 0.928395061728395, + "eval_overall_recall": 0.928395061728395, + "eval_runtime": 0.5916, + "eval_samples_per_second": 287.366, + "eval_steps_per_second": 5.071, + "step": 3552 + }, + { + "epoch": 38.0, + "grad_norm": 0.6949302554130554, + "learning_rate": 3.1e-05, + "loss": 0.0605, + "step": 3648 + }, + { + "epoch": 38.0, + "eval_LOCATION_f1": 0.8803827751196173, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8679245283018868, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8922155688622756, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.9141104294478528, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.05016703903675079, + "eval_overall_accuracy": 0.9831538249102458, + "eval_overall_f1": 0.913151364764268, + "eval_overall_precision": 0.9177057356608479, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.5914, + "eval_samples_per_second": 287.442, + "eval_steps_per_second": 5.073, + "step": 3648 + }, + { + "epoch": 39.0, + "grad_norm": 0.5452874302864075, + "learning_rate": 3.05e-05, + "loss": 0.0609, + "step": 3744 + }, + { + "epoch": 39.0, + "eval_LOCATION_f1": 0.8803827751196173, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8679245283018868, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8953488372093024, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8901734104046243, + "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.046954866498708725, + "eval_overall_accuracy": 0.9842584921292461, + "eval_overall_f1": 0.9142156862745098, + "eval_overall_precision": 0.9075425790754258, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.6085, + "eval_samples_per_second": 279.378, + "eval_steps_per_second": 4.93, + "step": 3744 + }, + { + "epoch": 40.0, + "grad_norm": 2.737624406814575, + "learning_rate": 3e-05, + "loss": 0.0601, + "step": 3840 + }, + { + "epoch": 40.0, + "eval_LOCATION_f1": 0.8878048780487806, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8921568627450981, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.9032258064516129, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.9058823529411765, + "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.046938493847846985, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.919653893695921, + "eval_overall_precision": 0.9207920792079208, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.5981, + "eval_samples_per_second": 284.253, + "eval_steps_per_second": 5.016, + "step": 3840 + }, + { + "epoch": 41.0, + "grad_norm": 0.9197700619697571, + "learning_rate": 2.95e-05, + "loss": 0.0588, + "step": 3936 + }, + { + "epoch": 41.0, + "eval_LOCATION_f1": 0.9064039408866995, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.92, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.9137931034482759, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8983050847457628, + "eval_ORGANIZATION_recall": 0.9298245614035088, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04637211561203003, + "eval_overall_accuracy": 0.9864678265672466, + "eval_overall_f1": 0.9287469287469288, + "eval_overall_precision": 0.9242053789731052, + "eval_overall_recall": 0.9333333333333333, + "eval_runtime": 0.5998, + "eval_samples_per_second": 283.424, + "eval_steps_per_second": 5.002, + "step": 3936 + }, + { + "epoch": 42.0, + "grad_norm": 0.33734437823295593, + "learning_rate": 2.9e-05, + "loss": 0.0576, + "step": 4032 + }, + { + "epoch": 42.0, + "eval_LOCATION_f1": 0.9064039408866995, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.92, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.9111747851002865, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8932584269662921, + "eval_ORGANIZATION_recall": 0.9298245614035088, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04518849030137062, + "eval_overall_accuracy": 0.9870201601767468, + "eval_overall_f1": 0.9276073619631902, + "eval_overall_precision": 0.9219512195121952, + "eval_overall_recall": 0.9333333333333333, + "eval_runtime": 0.5992, + "eval_samples_per_second": 283.689, + "eval_steps_per_second": 5.006, + "step": 4032 + }, + { + "epoch": 43.0, + "grad_norm": 0.9250084757804871, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.057, + "step": 4128 + }, + { + "epoch": 43.0, + "eval_LOCATION_f1": 0.8803827751196173, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8679245283018868, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8941176470588235, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8994082840236687, + "eval_ORGANIZATION_recall": 0.8888888888888888, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04571167007088661, + "eval_overall_accuracy": 0.9853631593482464, + "eval_overall_f1": 0.9137931034482758, + "eval_overall_precision": 0.9115479115479116, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.5947, + "eval_samples_per_second": 285.867, + "eval_steps_per_second": 5.045, + "step": 4128 + }, + { + "epoch": 44.0, + "grad_norm": 1.0702989101409912, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.056, + "step": 4224 + }, + { + "epoch": 44.0, + "eval_LOCATION_f1": 0.8932038834951457, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8932038834951457, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8979591836734693, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8953488372093024, + "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04532807692885399, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.91871921182266, + "eval_overall_precision": 0.9164619164619164, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.6003, + "eval_samples_per_second": 283.17, + "eval_steps_per_second": 4.997, + "step": 4224 + }, + { + "epoch": 45.0, + "grad_norm": 1.9211422204971313, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.0557, + "step": 4320 + }, + { + "epoch": 45.0, + "eval_LOCATION_f1": 0.8761904761904761, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8598130841121495, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8914956011730205, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8941176470588236, + "eval_ORGANIZATION_recall": 0.8888888888888888, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.0455101914703846, + "eval_overall_accuracy": 0.9850869925434963, + "eval_overall_f1": 0.9115479115479117, + "eval_overall_precision": 0.9070904645476773, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.6588, + "eval_samples_per_second": 258.043, + "eval_steps_per_second": 4.554, + "step": 4320 + }, + { + "epoch": 46.0, + "grad_norm": 0.976703941822052, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.0565, + "step": 4416 + }, + { + "epoch": 46.0, + "eval_LOCATION_f1": 0.8651162790697675, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8303571428571429, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8776119402985074, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8963414634146342, + "eval_ORGANIZATION_recall": 0.8596491228070176, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04744512960314751, + "eval_overall_accuracy": 0.9828776581054958, + "eval_overall_f1": 0.902829028290283, + "eval_overall_precision": 0.8995098039215687, + "eval_overall_recall": 0.9061728395061729, + "eval_runtime": 0.5904, + "eval_samples_per_second": 287.926, + "eval_steps_per_second": 5.081, + "step": 4416 + }, + { + "epoch": 47.0, + "grad_norm": 0.8773436546325684, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.0534, + "step": 4512 + }, + { + "epoch": 47.0, + "eval_LOCATION_f1": 0.8932038834951457, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8932038834951457, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8985507246376812, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8908045977011494, + "eval_ORGANIZATION_recall": 0.9064327485380117, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04476306214928627, + "eval_overall_accuracy": 0.9859154929577465, + "eval_overall_f1": 0.920049200492005, + "eval_overall_precision": 0.9166666666666666, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.5945, + "eval_samples_per_second": 285.942, + "eval_steps_per_second": 5.046, + "step": 4512 + }, + { + "epoch": 48.0, + "grad_norm": 1.278594970703125, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.0523, + "step": 4608 + }, + { + "epoch": 48.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8961424332344213, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.9096385542168675, + "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04515359550714493, + "eval_overall_accuracy": 0.9859154929577465, + "eval_overall_f1": 0.9149198520345252, + "eval_overall_precision": 0.9137931034482759, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.5953, + "eval_samples_per_second": 285.59, + "eval_steps_per_second": 5.04, + "step": 4608 + }, + { + "epoch": 49.0, + "grad_norm": 0.56380695104599, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.0523, + "step": 4704 + }, + { + "epoch": 49.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8856304985337242, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.888235294117647, + "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04556996375322342, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9104294478527607, + "eval_overall_precision": 0.9048780487804878, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.5918, + "eval_samples_per_second": 287.26, + "eval_steps_per_second": 5.069, + "step": 4704 + }, + { + "epoch": 50.0, + "grad_norm": 1.6369190216064453, + "learning_rate": 2.5e-05, + "loss": 0.0509, + "step": 4800 + }, + { + "epoch": 50.0, + "eval_LOCATION_f1": 0.8773584905660378, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8532110091743119, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8802395209580838, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.901840490797546, + "eval_ORGANIZATION_recall": 0.8596491228070176, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.046390656381845474, + "eval_overall_accuracy": 0.9828776581054958, + "eval_overall_f1": 0.9084158415841586, + "eval_overall_precision": 0.9106699751861043, + "eval_overall_recall": 0.9061728395061729, + "eval_runtime": 0.6054, + "eval_samples_per_second": 280.8, + "eval_steps_per_second": 4.955, + "step": 4800 + }, + { + "epoch": 51.0, + "grad_norm": 0.7033815383911133, + "learning_rate": 2.45e-05, + "loss": 0.052, + "step": 4896 + }, + { + "epoch": 51.0, + "eval_LOCATION_f1": 0.8663594470046084, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8245614035087719, + "eval_LOCATION_recall": 0.912621359223301, + "eval_ORGANIZATION_f1": 0.8795180722891567, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.906832298136646, + "eval_ORGANIZATION_recall": 0.8538011695906432, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04676185920834541, + "eval_overall_accuracy": 0.9842584921292461, + "eval_overall_f1": 0.9050554870530209, + "eval_overall_precision": 0.9039408866995073, + "eval_overall_recall": 0.9061728395061729, + "eval_runtime": 0.5936, + "eval_samples_per_second": 286.386, + "eval_steps_per_second": 5.054, + "step": 4896 + }, + { + "epoch": 52.0, + "grad_norm": 0.6778050065040588, + "learning_rate": 2.4e-05, + "loss": 0.0511, + "step": 4992 + }, + { + "epoch": 52.0, + "eval_LOCATION_f1": 0.8975609756097561, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.9019607843137255, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8979591836734693, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8953488372093024, + "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04431942105293274, + "eval_overall_accuracy": 0.9861916597624966, + "eval_overall_f1": 0.9209876543209876, + "eval_overall_precision": 0.9209876543209876, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.6009, + "eval_samples_per_second": 282.922, + "eval_steps_per_second": 4.993, + "step": 4992 + }, + { + "epoch": 53.0, + "grad_norm": 0.7755054831504822, + "learning_rate": 2.35e-05, + "loss": 0.051, + "step": 5088 + }, + { + "epoch": 53.0, + "eval_LOCATION_f1": 0.8773584905660378, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8532110091743119, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8875739644970414, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8982035928143712, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.045676685869693756, + "eval_overall_accuracy": 0.983982325324496, + "eval_overall_f1": 0.9102091020910209, + "eval_overall_precision": 0.9068627450980392, + "eval_overall_recall": 0.9135802469135802, + "eval_runtime": 0.61, + "eval_samples_per_second": 278.668, + "eval_steps_per_second": 4.918, + "step": 5088 + }, + { + "epoch": 54.0, + "grad_norm": 0.47155851125717163, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.0483, + "step": 5184 + }, + { + "epoch": 54.0, + "eval_LOCATION_f1": 0.9029126213592235, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.9029126213592233, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.9017341040462428, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8914285714285715, + "eval_ORGANIZATION_recall": 0.9122807017543859, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.044774431735277176, + "eval_overall_accuracy": 0.9856393261529964, + "eval_overall_f1": 0.9238329238329238, + "eval_overall_precision": 0.9193154034229829, + "eval_overall_recall": 0.928395061728395, + "eval_runtime": 0.5995, + "eval_samples_per_second": 283.593, + "eval_steps_per_second": 5.005, + "step": 5184 + }, + { + "epoch": 55.0, + "grad_norm": 1.5237046480178833, + "learning_rate": 2.25e-05, + "loss": 0.0525, + "step": 5280 + }, + { + "epoch": 55.0, + "eval_LOCATION_f1": 0.8899521531100479, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8773584905660378, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8953488372093024, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8901734104046243, + "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04513506591320038, + "eval_overall_accuracy": 0.9853631593482464, + "eval_overall_f1": 0.9177914110429448, + "eval_overall_precision": 0.9121951219512195, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.6045, + "eval_samples_per_second": 281.204, + "eval_steps_per_second": 4.962, + "step": 5280 + }, + { + "epoch": 56.0, + "grad_norm": 0.6528781652450562, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.0479, + "step": 5376 + }, + { + "epoch": 56.0, + "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8691588785046729, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8914956011730205, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8941176470588236, + "eval_ORGANIZATION_recall": 0.8888888888888888, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04449770227074623, + "eval_overall_accuracy": 0.9850869925434963, + "eval_overall_f1": 0.915129151291513, + "eval_overall_precision": 0.9117647058823529, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.5981, + "eval_samples_per_second": 284.219, + "eval_steps_per_second": 5.016, + "step": 5376 + }, + { + "epoch": 57.0, + "grad_norm": 1.4258739948272705, + "learning_rate": 2.15e-05, + "loss": 0.0486, + "step": 5472 + }, + { + "epoch": 57.0, + "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8691588785046729, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8888888888888888, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8888888888888888, + "eval_ORGANIZATION_recall": 0.8888888888888888, + "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.044527389109134674, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9128834355828221, + "eval_overall_precision": 0.9073170731707317, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.597, + "eval_samples_per_second": 284.74, + "eval_steps_per_second": 5.025, + "step": 5472 + }, + { + "epoch": 58.0, + "grad_norm": 0.6324487924575806, + "learning_rate": 2.1e-05, + "loss": 0.0457, + "step": 5568 + }, + { + "epoch": 58.0, + "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8691588785046729, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8947368421052632, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8947368421052632, + "eval_ORGANIZATION_recall": 0.8947368421052632, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04367503523826599, + "eval_overall_accuracy": 0.9856393261529964, + "eval_overall_f1": 0.9164619164619164, + "eval_overall_precision": 0.9119804400977995, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.6033, + "eval_samples_per_second": 281.778, + "eval_steps_per_second": 4.973, + "step": 5568 + }, + { + "epoch": 59.0, + "grad_norm": 1.5328294038772583, + "learning_rate": 2.05e-05, + "loss": 0.0478, + "step": 5664 + }, + { + "epoch": 59.0, + "eval_LOCATION_f1": 0.8773584905660378, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8532110091743119, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8816568047337278, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8922155688622755, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04407254979014397, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9088669950738916, + "eval_overall_precision": 0.9066339066339066, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.6034, + "eval_samples_per_second": 281.724, + "eval_steps_per_second": 4.972, + "step": 5664 + }, + { + "epoch": 60.0, + "grad_norm": 0.5337828993797302, + "learning_rate": 2e-05, + "loss": 0.0472, + "step": 5760 + }, + { + "epoch": 60.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8849557522123894, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8928571428571429, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04403013363480568, + "eval_overall_accuracy": 0.9850869925434963, + "eval_overall_f1": 0.9113300492610836, + "eval_overall_precision": 0.9090909090909091, + "eval_overall_recall": 0.9135802469135802, + "eval_runtime": 0.5975, + "eval_samples_per_second": 284.5, + "eval_steps_per_second": 5.021, + "step": 5760 + }, + { + "epoch": 61.0, + "grad_norm": 0.9366702437400818, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.049, + "step": 5856 + }, + { + "epoch": 61.0, + "eval_LOCATION_f1": 0.8942307692307693, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8857142857142857, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8895348837209303, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.884393063583815, + "eval_ORGANIZATION_recall": 0.8947368421052632, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04451928660273552, + "eval_overall_accuracy": 0.9853631593482464, + "eval_overall_f1": 0.9164619164619164, + "eval_overall_precision": 0.9119804400977995, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.6062, + "eval_samples_per_second": 280.441, + "eval_steps_per_second": 4.949, + "step": 5856 + }, + { + "epoch": 62.0, + "grad_norm": 1.8338637351989746, + "learning_rate": 1.9e-05, + "loss": 0.0476, + "step": 5952 + }, + { + "epoch": 62.0, + "eval_LOCATION_f1": 0.869158878504673, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8378378378378378, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.875739644970414, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8862275449101796, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.045562200248241425, + "eval_overall_accuracy": 0.983982325324496, + "eval_overall_f1": 0.9041769041769042, + "eval_overall_precision": 0.8997555012224939, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.6542, + "eval_samples_per_second": 259.871, + "eval_steps_per_second": 4.586, + "step": 5952 + }, + { + "epoch": 63.0, + "grad_norm": 2.0841386318206787, + "learning_rate": 1.85e-05, + "loss": 0.0457, + "step": 6048 + }, + { + "epoch": 63.0, + "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8691588785046729, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8979591836734693, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8953488372093024, + "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.043963029980659485, + "eval_overall_accuracy": 0.9859154929577465, + "eval_overall_f1": 0.9177914110429448, + "eval_overall_precision": 0.9121951219512195, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.6065, + "eval_samples_per_second": 280.291, + "eval_steps_per_second": 4.946, + "step": 6048 + }, + { + "epoch": 64.0, + "grad_norm": 0.9337228536605835, + "learning_rate": 1.8e-05, + "loss": 0.0446, + "step": 6144 + }, + { + "epoch": 64.0, + "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8691588785046729, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8882352941176469, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.893491124260355, + "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04478294402360916, + "eval_overall_accuracy": 0.9845346589339962, + "eval_overall_f1": 0.9137931034482758, + "eval_overall_precision": 0.9115479115479116, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.5976, + "eval_samples_per_second": 284.482, + "eval_steps_per_second": 5.02, + "step": 6144 + }, + { + "epoch": 65.0, + "grad_norm": 1.0935461521148682, + "learning_rate": 1.75e-05, + "loss": 0.0465, + "step": 6240 + }, + { + "epoch": 65.0, + "eval_LOCATION_f1": 0.8826291079812206, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8545454545454545, + "eval_LOCATION_recall": 0.912621359223301, + "eval_ORGANIZATION_f1": 0.8908554572271385, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8988095238095238, + "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04469531029462814, + "eval_overall_accuracy": 0.9856393261529964, + "eval_overall_f1": 0.914004914004914, + "eval_overall_precision": 0.9095354523227384, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.603, + "eval_samples_per_second": 281.935, + "eval_steps_per_second": 4.975, + "step": 6240 + }, + { + "epoch": 66.0, + "grad_norm": 0.588843584060669, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.0456, + "step": 6336 + }, + { + "epoch": 66.0, + "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8691588785046729, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8842729970326408, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8975903614457831, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04511041194200516, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9122373300370827, + "eval_overall_precision": 0.9133663366336634, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.5959, + "eval_samples_per_second": 285.269, + "eval_steps_per_second": 5.034, + "step": 6336 + }, + { + "epoch": 67.0, + "grad_norm": 0.40730783343315125, + "learning_rate": 1.65e-05, + "loss": 0.043, + "step": 6432 + }, + { + "epoch": 67.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.874251497005988, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8957055214723927, + "eval_ORGANIZATION_recall": 0.8538011695906432, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04662081599235535, + "eval_overall_accuracy": 0.9823253244959956, + "eval_overall_f1": 0.9070631970260223, + "eval_overall_precision": 0.9104477611940298, + "eval_overall_recall": 0.9037037037037037, + "eval_runtime": 0.5968, + "eval_samples_per_second": 284.867, + "eval_steps_per_second": 5.027, + "step": 6432 + }, + { + "epoch": 68.0, + "grad_norm": 1.010898470878601, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.0441, + "step": 6528 + }, + { + "epoch": 68.0, + "eval_LOCATION_f1": 0.8909952606635072, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8703703703703703, + "eval_LOCATION_recall": 0.912621359223301, + "eval_ORGANIZATION_f1": 0.879765395894428, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8823529411764706, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04551886394619942, + "eval_overall_accuracy": 0.9845346589339962, + "eval_overall_f1": 0.9115479115479117, + "eval_overall_precision": 0.9070904645476773, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.6631, + "eval_samples_per_second": 256.37, + "eval_steps_per_second": 4.524, + "step": 6528 + }, + { + "epoch": 69.0, + "grad_norm": 0.356332927942276, + "learning_rate": 1.55e-05, + "loss": 0.0439, + "step": 6624 + }, + { + "epoch": 69.0, + "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8691588785046729, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.879765395894428, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8823529411764706, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04542705416679382, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9102091020910209, + "eval_overall_precision": 0.9068627450980392, + "eval_overall_recall": 0.9135802469135802, + "eval_runtime": 0.5976, + "eval_samples_per_second": 284.455, + "eval_steps_per_second": 5.02, + "step": 6624 + }, + { + "epoch": 70.0, + "grad_norm": 0.5880185961723328, + "learning_rate": 1.5e-05, + "loss": 0.0439, + "step": 6720 + }, + { + "epoch": 70.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8882352941176469, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.893491124260355, + "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.044994134455919266, + "eval_overall_accuracy": 0.9850869925434963, + "eval_overall_f1": 0.9126691266912669, + "eval_overall_precision": 0.9093137254901961, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.597, + "eval_samples_per_second": 284.742, + "eval_steps_per_second": 5.025, + "step": 6720 + }, + { + "epoch": 71.0, + "grad_norm": 0.9274646639823914, + "learning_rate": 1.45e-05, + "loss": 0.0428, + "step": 6816 + }, + { + "epoch": 71.0, + "eval_LOCATION_f1": 0.8952380952380954, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8785046728971962, + "eval_LOCATION_recall": 0.912621359223301, + "eval_ORGANIZATION_f1": 0.8888888888888888, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8888888888888888, + "eval_ORGANIZATION_recall": 0.8888888888888888, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04472970589995384, + "eval_overall_accuracy": 0.9859154929577465, + "eval_overall_f1": 0.9164619164619164, + "eval_overall_precision": 0.9119804400977995, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.5987, + "eval_samples_per_second": 283.931, + "eval_steps_per_second": 5.011, + "step": 6816 + }, + { + "epoch": 72.0, + "grad_norm": 0.35504868626594543, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.0438, + "step": 6912 + }, + { + "epoch": 72.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8795180722891567, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.906832298136646, + "eval_ORGANIZATION_recall": 0.8538011695906432, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.046420272439718246, + "eval_overall_accuracy": 0.9828776581054958, + "eval_overall_f1": 0.9093167701863354, + "eval_overall_precision": 0.915, + "eval_overall_recall": 0.9037037037037037, + "eval_runtime": 0.5929, + "eval_samples_per_second": 286.713, + "eval_steps_per_second": 5.06, + "step": 6912 + }, + { + "epoch": 73.0, + "grad_norm": 1.1146135330200195, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.0431, + "step": 7008 + }, + { + "epoch": 73.0, + "eval_LOCATION_f1": 0.8867924528301887, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8623853211009175, + "eval_LOCATION_recall": 0.912621359223301, + "eval_ORGANIZATION_f1": 0.8823529411764705, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8875739644970414, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.044833190739154816, + "eval_overall_accuracy": 0.9856393261529964, + "eval_overall_f1": 0.9115479115479117, + "eval_overall_precision": 0.9070904645476773, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.606, + "eval_samples_per_second": 280.509, + "eval_steps_per_second": 4.95, + "step": 7008 + }, + { + "epoch": 74.0, + "grad_norm": 0.3997597396373749, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.0415, + "step": 7104 + }, + { + "epoch": 74.0, + "eval_LOCATION_f1": 0.8773584905660378, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8532110091743119, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.875739644970414, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8862275449101796, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04576844349503517, + "eval_overall_accuracy": 0.9842584921292461, + "eval_overall_f1": 0.9064039408866995, + "eval_overall_precision": 0.9041769041769042, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.6179, + "eval_samples_per_second": 275.116, + "eval_steps_per_second": 4.855, + "step": 7104 + }, + { + "epoch": 75.0, + "grad_norm": 0.6900985836982727, + "learning_rate": 1.25e-05, + "loss": 0.0429, + "step": 7200 + }, + { + "epoch": 75.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8731563421828908, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8809523809523809, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.046131208539009094, + "eval_overall_accuracy": 0.983982325324496, + "eval_overall_f1": 0.9064039408866995, + "eval_overall_precision": 0.9041769041769042, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.6007, + "eval_samples_per_second": 283.003, + "eval_steps_per_second": 4.994, + "step": 7200 + }, + { + "epoch": 76.0, + "grad_norm": 0.35220813751220703, + "learning_rate": 1.2e-05, + "loss": 0.0426, + "step": 7296 + }, + { + "epoch": 76.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8731563421828908, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8809523809523809, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04544881358742714, + "eval_overall_accuracy": 0.9845346589339962, + "eval_overall_f1": 0.9064039408866995, + "eval_overall_precision": 0.9041769041769042, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.5968, + "eval_samples_per_second": 284.861, + "eval_steps_per_second": 5.027, + "step": 7296 + }, + { + "epoch": 77.0, + "grad_norm": 0.7620628476142883, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.043, + "step": 7392 + }, + { + "epoch": 77.0, + "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8691588785046729, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.875739644970414, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8862275449101796, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04560285061597824, + "eval_overall_accuracy": 0.9845346589339962, + "eval_overall_f1": 0.908641975308642, + "eval_overall_precision": 0.908641975308642, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.5922, + "eval_samples_per_second": 287.086, + "eval_steps_per_second": 5.066, + "step": 7392 + }, + { + "epoch": 78.0, + "grad_norm": 0.6972180008888245, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0397, + "step": 7488 + }, + { + "epoch": 78.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8842729970326408, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8975903614457831, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.045004814863204956, + "eval_overall_accuracy": 0.9856393261529964, + "eval_overall_f1": 0.9111111111111111, + "eval_overall_precision": 0.9111111111111111, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.5949, + "eval_samples_per_second": 285.753, + "eval_steps_per_second": 5.043, + "step": 7488 + }, + { + "epoch": 79.0, + "grad_norm": 1.175458312034607, + "learning_rate": 1.05e-05, + "loss": 0.0411, + "step": 7584 + }, + { + "epoch": 79.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8731563421828908, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8809523809523809, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04489603638648987, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9064039408866995, + "eval_overall_precision": 0.9041769041769042, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.5924, + "eval_samples_per_second": 286.953, + "eval_steps_per_second": 5.064, + "step": 7584 + }, + { + "epoch": 80.0, + "grad_norm": 1.935158610343933, + "learning_rate": 1e-05, + "loss": 0.0417, + "step": 7680 + }, + { + "epoch": 80.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8823529411764705, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8875739644970414, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04481157660484314, + "eval_overall_accuracy": 0.9856393261529964, + "eval_overall_f1": 0.9102091020910209, + "eval_overall_precision": 0.9068627450980392, + "eval_overall_recall": 0.9135802469135802, + "eval_runtime": 0.6085, + "eval_samples_per_second": 279.37, + "eval_steps_per_second": 4.93, + "step": 7680 + }, + { + "epoch": 81.0, + "grad_norm": 0.7792730927467346, + "learning_rate": 9.5e-06, + "loss": 0.0423, + "step": 7776 + }, + { + "epoch": 81.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8823529411764705, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8875739644970414, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04457651078701019, + "eval_overall_accuracy": 0.9856393261529964, + "eval_overall_f1": 0.9102091020910209, + "eval_overall_precision": 0.9068627450980392, + "eval_overall_recall": 0.9135802469135802, + "eval_runtime": 0.6034, + "eval_samples_per_second": 281.758, + "eval_steps_per_second": 4.972, + "step": 7776 + }, + { + "epoch": 82.0, + "grad_norm": 1.640648365020752, + "learning_rate": 9e-06, + "loss": 0.0434, + "step": 7872 + }, + { + "epoch": 82.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8994082840236686, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.9101796407185628, + "eval_ORGANIZATION_recall": 0.8888888888888888, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04452081397175789, + "eval_overall_accuracy": 0.9861916597624966, + "eval_overall_f1": 0.9173859432799014, + "eval_overall_precision": 0.916256157635468, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.6038, + "eval_samples_per_second": 281.535, + "eval_steps_per_second": 4.968, + "step": 7872 + }, + { + "epoch": 83.0, + "grad_norm": 0.8310399651527405, + "learning_rate": 8.500000000000002e-06, + "loss": 0.0394, + "step": 7968 + }, + { + "epoch": 83.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8731563421828908, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8809523809523809, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04489120468497276, + "eval_overall_accuracy": 0.9850869925434963, + "eval_overall_f1": 0.9064039408866995, + "eval_overall_precision": 0.9041769041769042, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.6046, + "eval_samples_per_second": 281.181, + "eval_steps_per_second": 4.962, + "step": 7968 + }, + { + "epoch": 84.0, + "grad_norm": 0.9918228387832642, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0413, + "step": 8064 + }, + { + "epoch": 84.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8809523809523809, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.896969696969697, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04544991999864578, + "eval_overall_accuracy": 0.9850869925434963, + "eval_overall_f1": 0.9097651421508034, + "eval_overall_precision": 0.9108910891089109, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.6004, + "eval_samples_per_second": 283.156, + "eval_steps_per_second": 4.997, + "step": 8064 + }, + { + "epoch": 85.0, + "grad_norm": 0.9799830317497253, + "learning_rate": 7.5e-06, + "loss": 0.0408, + "step": 8160 + }, + { + "epoch": 85.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8783382789317508, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.891566265060241, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.045652709901332855, + "eval_overall_accuracy": 0.9842584921292461, + "eval_overall_f1": 0.908641975308642, + "eval_overall_precision": 0.908641975308642, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.604, + "eval_samples_per_second": 281.458, + "eval_steps_per_second": 4.967, + "step": 8160 + }, + { + "epoch": 86.0, + "grad_norm": 1.889784574508667, + "learning_rate": 7.000000000000001e-06, + "loss": 0.0412, + "step": 8256 + }, + { + "epoch": 86.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8731563421828908, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8809523809523809, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.045468661934137344, + "eval_overall_accuracy": 0.9845346589339962, + "eval_overall_f1": 0.9064039408866995, + "eval_overall_precision": 0.9041769041769042, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.6007, + "eval_samples_per_second": 283.007, + "eval_steps_per_second": 4.994, + "step": 8256 + }, + { + "epoch": 87.0, + "grad_norm": 1.0306001901626587, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.0425, + "step": 8352 + }, + { + "epoch": 87.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8731563421828908, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8809523809523809, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.045380450785160065, + "eval_overall_accuracy": 0.9842584921292461, + "eval_overall_f1": 0.9064039408866995, + "eval_overall_precision": 0.9041769041769042, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.607, + "eval_samples_per_second": 280.052, + "eval_steps_per_second": 4.942, + "step": 8352 + }, + { + "epoch": 88.0, + "grad_norm": 1.6095170974731445, + "learning_rate": 6e-06, + "loss": 0.0404, + "step": 8448 + }, + { + "epoch": 88.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8764705882352941, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8816568047337278, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.045171529054641724, + "eval_overall_accuracy": 0.9850869925434963, + "eval_overall_f1": 0.9077490774907748, + "eval_overall_precision": 0.9044117647058824, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.5928, + "eval_samples_per_second": 286.772, + "eval_steps_per_second": 5.061, + "step": 8448 + }, + { + "epoch": 89.0, + "grad_norm": 1.9292471408843994, + "learning_rate": 5.500000000000001e-06, + "loss": 0.0403, + "step": 8544 + }, + { + "epoch": 89.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.875739644970414, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8862275449101796, + "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04589909315109253, + "eval_overall_accuracy": 0.9842584921292461, + "eval_overall_f1": 0.9075215782983971, + "eval_overall_precision": 0.9064039408866995, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.6049, + "eval_samples_per_second": 281.046, + "eval_steps_per_second": 4.96, + "step": 8544 + }, + { + "epoch": 90.0, + "grad_norm": 0.9496196508407593, + "learning_rate": 5e-06, + "loss": 0.0395, + "step": 8640 + }, + { + "epoch": 90.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8739002932551321, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8764705882352941, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04514380544424057, + "eval_overall_accuracy": 0.9845346589339962, + "eval_overall_f1": 0.9066339066339066, + "eval_overall_precision": 0.902200488997555, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.6057, + "eval_samples_per_second": 280.676, + "eval_steps_per_second": 4.953, + "step": 8640 + }, + { + "epoch": 91.0, + "grad_norm": 1.498864769935608, + "learning_rate": 4.5e-06, + "loss": 0.0413, + "step": 8736 + }, + { + "epoch": 91.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8739002932551321, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8764705882352941, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04504216089844704, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9066339066339066, + "eval_overall_precision": 0.902200488997555, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.6, + "eval_samples_per_second": 283.336, + "eval_steps_per_second": 5.0, + "step": 8736 + }, + { + "epoch": 92.0, + "grad_norm": 0.8939065337181091, + "learning_rate": 4.000000000000001e-06, + "loss": 0.039, + "step": 8832 + }, + { + "epoch": 92.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8739002932551321, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8764705882352941, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04519949480891228, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9066339066339066, + "eval_overall_precision": 0.902200488997555, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.6015, + "eval_samples_per_second": 282.646, + "eval_steps_per_second": 4.988, + "step": 8832 + }, + { + "epoch": 93.0, + "grad_norm": 76.55709838867188, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.0402, + "step": 8928 + }, + { + "epoch": 93.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8764705882352941, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8816568047337278, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.045382946729660034, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9077490774907748, + "eval_overall_precision": 0.9044117647058824, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.5985, + "eval_samples_per_second": 284.026, + "eval_steps_per_second": 5.012, + "step": 8928 + }, + { + "epoch": 94.0, + "grad_norm": 0.24397915601730347, + "learning_rate": 3e-06, + "loss": 0.0397, + "step": 9024 + }, + { + "epoch": 94.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8739002932551321, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8764705882352941, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.045313529670238495, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9066339066339066, + "eval_overall_precision": 0.902200488997555, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.5887, + "eval_samples_per_second": 288.767, + "eval_steps_per_second": 5.096, + "step": 9024 + }, + { + "epoch": 95.0, + "grad_norm": 0.29766303300857544, + "learning_rate": 2.5e-06, + "loss": 0.0409, + "step": 9120 + }, + { + "epoch": 95.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8764705882352941, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8816568047337278, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.045457012951374054, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9077490774907748, + "eval_overall_precision": 0.9044117647058824, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.5948, + "eval_samples_per_second": 285.825, + "eval_steps_per_second": 5.044, + "step": 9120 + }, + { + "epoch": 96.0, + "grad_norm": 1.5634249448776245, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.039, + "step": 9216 + }, + { + "epoch": 96.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8739002932551321, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8764705882352941, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.0454791858792305, + "eval_overall_accuracy": 0.9845346589339962, + "eval_overall_f1": 0.9066339066339066, + "eval_overall_precision": 0.902200488997555, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.5934, + "eval_samples_per_second": 286.492, + "eval_steps_per_second": 5.056, + "step": 9216 + }, + { + "epoch": 97.0, + "grad_norm": 0.9605298042297363, + "learning_rate": 1.5e-06, + "loss": 0.0402, + "step": 9312 + }, + { + "epoch": 97.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8739002932551321, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8764705882352941, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.0456123985350132, + "eval_overall_accuracy": 0.9845346589339962, + "eval_overall_f1": 0.9066339066339066, + "eval_overall_precision": 0.902200488997555, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.6745, + "eval_samples_per_second": 252.035, + "eval_steps_per_second": 4.448, + "step": 9312 + }, + { + "epoch": 98.0, + "grad_norm": 1.7611069679260254, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.0382, + "step": 9408 + }, + { + "epoch": 98.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8764705882352941, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8816568047337278, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04547755792737007, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9077490774907748, + "eval_overall_precision": 0.9044117647058824, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.5989, + "eval_samples_per_second": 283.847, + "eval_steps_per_second": 5.009, + "step": 9408 + }, + { + "epoch": 99.0, + "grad_norm": 0.4898216426372528, + "learning_rate": 5.000000000000001e-07, + "loss": 0.0392, + "step": 9504 + }, + { + "epoch": 99.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8764705882352941, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8816568047337278, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.04550032317638397, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9077490774907748, + "eval_overall_precision": 0.9044117647058824, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.5984, + "eval_samples_per_second": 284.085, + "eval_steps_per_second": 5.013, + "step": 9504 + }, + { + "epoch": 100.0, + "grad_norm": 1.0116997957229614, + "learning_rate": 0.0, + "loss": 0.0412, + "step": 9600 + }, + { + "epoch": 100.0, + "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.8611111111111112, + "eval_LOCATION_recall": 0.9029126213592233, + "eval_ORGANIZATION_f1": 0.8764705882352941, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.8816568047337278, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.9694656488549618, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.045390497893095016, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9077490774907748, + "eval_overall_precision": 0.9044117647058824, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.6032, + "eval_samples_per_second": 281.85, + "eval_steps_per_second": 4.974, + "step": 9600 + }, + { + "epoch": 100.0, + "step": 9600, + "total_flos": 3889996718638620.0, + "train_loss": 0.09181262865662575, + "train_runtime": 1037.4922, + "train_samples_per_second": 147.567, + "train_steps_per_second": 9.253 + } + ], + "logging_steps": 500, + "max_steps": 9600, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 500, + "total_flos": 3889996718638620.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}