diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -10,9 +10,9 @@ "log_history": [ { "epoch": 1.0, - "grad_norm": 1.569076418876648, + "grad_norm": 1.4739856719970703, "learning_rate": 4.9500000000000004e-05, - "loss": 1.1639, + "loss": 1.1566, "step": 96 }, { @@ -29,21 +29,21 @@ "eval_PERSON_number": 131, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, - "eval_loss": 0.6982586979866028, + "eval_loss": 0.695199191570282, "eval_overall_accuracy": 0.8373377520022093, "eval_overall_f1": 0.0, "eval_overall_precision": 0.0, "eval_overall_recall": 0.0, - "eval_runtime": 0.5919, - "eval_samples_per_second": 287.192, - "eval_steps_per_second": 5.068, + "eval_runtime": 0.2694, + "eval_samples_per_second": 630.973, + "eval_steps_per_second": 11.135, "step": 96 }, { "epoch": 2.0, - "grad_norm": 0.9749519228935242, + "grad_norm": 0.9800453186035156, "learning_rate": 4.9e-05, - "loss": 0.6685, + "loss": 0.6676, "step": 192 }, { @@ -60,21 +60,21 @@ "eval_PERSON_number": 131, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, - "eval_loss": 0.565019428730011, + "eval_loss": 0.5652989745140076, "eval_overall_accuracy": 0.8376139188069595, "eval_overall_f1": 0.0, "eval_overall_precision": 0.0, "eval_overall_recall": 0.0, - "eval_runtime": 0.6878, - "eval_samples_per_second": 247.148, - "eval_steps_per_second": 4.361, + "eval_runtime": 0.2734, + "eval_samples_per_second": 621.732, + "eval_steps_per_second": 10.972, "step": 192 }, { "epoch": 3.0, - "grad_norm": 1.0692148208618164, + "grad_norm": 1.1218823194503784, "learning_rate": 4.85e-05, - "loss": 0.553, + "loss": 0.5559, "step": 288 }, { @@ -83,525 +83,525 @@ "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, - "eval_ORGANIZATION_f1": 0.07407407407407407, - "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.3888888888888889, - "eval_ORGANIZATION_recall": 0.04093567251461988, - "eval_PERSON_f1": 0.10526315789473685, - "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.225, - "eval_PERSON_recall": 0.06870229007633588, - "eval_loss": 0.4425106644630432, - "eval_overall_accuracy": 0.8470035901684617, - "eval_overall_f1": 0.06866952789699571, - "eval_overall_precision": 0.26229508196721313, - "eval_overall_recall": 0.03950617283950617, - "eval_runtime": 0.6213, - "eval_samples_per_second": 273.628, - "eval_steps_per_second": 4.829, + "eval_ORGANIZATION_f1": 0.0641711229946524, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.375, + "eval_ORGANIZATION_recall": 0.03508771929824561, + "eval_PERSON_f1": 0.08588957055214724, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.21875, + "eval_PERSON_recall": 0.05343511450381679, + "eval_loss": 0.44871625304222107, + "eval_overall_accuracy": 0.8456227561447114, + "eval_overall_f1": 0.05714285714285715, + "eval_overall_precision": 0.26, + "eval_overall_recall": 0.03209876543209877, + "eval_runtime": 0.2727, + "eval_samples_per_second": 623.419, + "eval_steps_per_second": 11.002, "step": 288 }, { "epoch": 4.0, - "grad_norm": 0.8610777258872986, + "grad_norm": 0.906775176525116, "learning_rate": 4.8e-05, - "loss": 0.4403, + "loss": 0.4455, "step": 384 }, { "epoch": 4.0, - "eval_LOCATION_f1": 0.078125, + "eval_LOCATION_f1": 0.07874015748031495, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.2, + "eval_LOCATION_precision": 0.20833333333333334, "eval_LOCATION_recall": 0.04854368932038835, - "eval_ORGANIZATION_f1": 0.29078014184397166, - "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.36936936936936937, - "eval_ORGANIZATION_recall": 0.23976608187134502, - "eval_PERSON_f1": 0.4290657439446367, - "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.3924050632911392, - "eval_PERSON_recall": 0.4732824427480916, - "eval_loss": 0.33291852474212646, - "eval_overall_accuracy": 0.8834576083954708, - "eval_overall_f1": 0.3090128755364807, - "eval_overall_precision": 0.3673469387755102, - "eval_overall_recall": 0.26666666666666666, - "eval_runtime": 0.6305, - "eval_samples_per_second": 269.628, - "eval_steps_per_second": 4.758, + "eval_ORGANIZATION_f1": 0.2807017543859649, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.3508771929824561, + "eval_ORGANIZATION_recall": 0.23391812865497075, + "eval_PERSON_f1": 0.4098939929328622, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.3815789473684211, + "eval_PERSON_recall": 0.44274809160305345, + "eval_loss": 0.33893293142318726, + "eval_overall_accuracy": 0.8818006075669704, + "eval_overall_f1": 0.2964028776978417, + "eval_overall_precision": 0.35517241379310344, + "eval_overall_recall": 0.254320987654321, + "eval_runtime": 0.2771, + "eval_samples_per_second": 613.432, + "eval_steps_per_second": 10.825, "step": 384 }, { "epoch": 5.0, - "grad_norm": 0.5931413769721985, + "grad_norm": 0.6574228405952454, "learning_rate": 4.75e-05, - "loss": 0.3288, + "loss": 0.3416, "step": 480 }, { "epoch": 5.0, - "eval_LOCATION_f1": 0.3715846994535519, - "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.425, - "eval_LOCATION_recall": 0.3300970873786408, - "eval_ORGANIZATION_f1": 0.5683646112600537, - "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.5247524752475248, - "eval_ORGANIZATION_recall": 0.6198830409356725, - "eval_PERSON_f1": 0.6107382550335572, - "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.5449101796407185, - "eval_PERSON_recall": 0.6946564885496184, - "eval_loss": 0.24550960958003998, - "eval_overall_accuracy": 0.9262634631317316, - "eval_overall_f1": 0.540983606557377, - "eval_overall_precision": 0.5144766146993318, - "eval_overall_recall": 0.5703703703703704, - "eval_runtime": 0.6072, - "eval_samples_per_second": 279.961, - "eval_steps_per_second": 4.94, + "eval_LOCATION_f1": 0.3157894736842105, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.39705882352941174, + "eval_LOCATION_recall": 0.2621359223300971, + "eval_ORGANIZATION_f1": 0.5245901639344263, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.49230769230769234, + "eval_ORGANIZATION_recall": 0.5614035087719298, + "eval_PERSON_f1": 0.5847176079734219, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.5176470588235295, + "eval_PERSON_recall": 0.6717557251908397, + "eval_loss": 0.25828996300697327, + "eval_overall_accuracy": 0.9207401270367301, + "eval_overall_f1": 0.503579952267303, + "eval_overall_precision": 0.48729792147806006, + "eval_overall_recall": 0.5209876543209877, + "eval_runtime": 0.2793, + "eval_samples_per_second": 608.705, + "eval_steps_per_second": 10.742, "step": 480 }, { "epoch": 6.0, - "grad_norm": 1.293260097503662, + "grad_norm": 1.1292903423309326, "learning_rate": 4.7e-05, - "loss": 0.2474, + "loss": 0.2637, "step": 576 }, { "epoch": 6.0, - "eval_LOCATION_f1": 0.626865671641791, - "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.6428571428571429, - "eval_LOCATION_recall": 0.6116504854368932, - "eval_ORGANIZATION_f1": 0.6863270777479892, - "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.6336633663366337, - "eval_ORGANIZATION_recall": 0.7485380116959064, - "eval_PERSON_f1": 0.833922261484099, - "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.7763157894736842, - "eval_PERSON_recall": 0.9007633587786259, - "eval_loss": 0.18931446969509125, - "eval_overall_accuracy": 0.9505661419497377, - "eval_overall_f1": 0.721120186697783, - "eval_overall_precision": 0.6836283185840708, - "eval_overall_recall": 0.762962962962963, - "eval_runtime": 0.5962, - "eval_samples_per_second": 285.148, - "eval_steps_per_second": 5.032, + "eval_LOCATION_f1": 0.6060606060606061, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.631578947368421, + "eval_LOCATION_recall": 0.5825242718446602, + "eval_ORGANIZATION_f1": 0.712401055408971, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.6490384615384616, + "eval_ORGANIZATION_recall": 0.7894736842105263, + "eval_PERSON_f1": 0.767605633802817, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.7124183006535948, + "eval_PERSON_recall": 0.8320610687022901, + "eval_loss": 0.20057322084903717, + "eval_overall_accuracy": 0.9489091411212373, + "eval_overall_f1": 0.7061556329849014, + "eval_overall_precision": 0.6666666666666666, + "eval_overall_recall": 0.7506172839506173, + "eval_runtime": 0.2738, + "eval_samples_per_second": 620.929, + "eval_steps_per_second": 10.958, "step": 576 }, { "epoch": 7.0, - "grad_norm": 1.2653672695159912, + "grad_norm": 1.1348835229873657, "learning_rate": 4.6500000000000005e-05, - "loss": 0.1962, + "loss": 0.2115, "step": 672 }, { "epoch": 7.0, - "eval_LOCATION_f1": 0.7551020408163266, - "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.7956989247311828, - "eval_LOCATION_recall": 0.7184466019417476, - "eval_ORGANIZATION_f1": 0.7526881720430108, - "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.6965174129353234, - "eval_ORGANIZATION_recall": 0.8187134502923976, - "eval_PERSON_f1": 0.9157509157509158, - "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.8802816901408451, - "eval_PERSON_recall": 0.9541984732824428, - "eval_loss": 0.1533629298210144, - "eval_overall_accuracy": 0.9605081469207402, - "eval_overall_f1": 0.8061831153388822, - "eval_overall_precision": 0.7775229357798165, - "eval_overall_recall": 0.837037037037037, - "eval_runtime": 0.5884, - "eval_samples_per_second": 288.903, - "eval_steps_per_second": 5.098, + "eval_LOCATION_f1": 0.712871287128713, + "eval_LOCATION_number": 103, + "eval_LOCATION_precision": 0.7272727272727273, + "eval_LOCATION_recall": 0.6990291262135923, + "eval_ORGANIZATION_f1": 0.7540106951871658, + "eval_ORGANIZATION_number": 171, + "eval_ORGANIZATION_precision": 0.6945812807881774, + "eval_ORGANIZATION_recall": 0.8245614035087719, + "eval_PERSON_f1": 0.8945454545454545, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.8541666666666666, + "eval_PERSON_recall": 0.9389312977099237, + "eval_loss": 0.1648775041103363, + "eval_overall_accuracy": 0.9585749792874897, + "eval_overall_f1": 0.7896592244418331, + "eval_overall_precision": 0.7533632286995515, + "eval_overall_recall": 0.8296296296296296, + "eval_runtime": 0.2742, + "eval_samples_per_second": 620.066, + "eval_steps_per_second": 10.942, "step": 672 }, { "epoch": 8.0, - "grad_norm": 0.9401953816413879, + "grad_norm": 0.5443127751350403, "learning_rate": 4.600000000000001e-05, - "loss": 0.1659, + "loss": 0.1785, "step": 768 }, { "epoch": 8.0, - "eval_LOCATION_f1": 0.7980295566502462, + "eval_LOCATION_f1": 0.797979797979798, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.81, - "eval_LOCATION_recall": 0.7864077669902912, - "eval_ORGANIZATION_f1": 0.7899159663865545, + "eval_LOCATION_precision": 0.8315789473684211, + "eval_LOCATION_recall": 0.7669902912621359, + "eval_ORGANIZATION_f1": 0.7912087912087912, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.7580645161290323, - "eval_ORGANIZATION_recall": 0.8245614035087719, - "eval_PERSON_f1": 0.9328358208955224, + "eval_ORGANIZATION_precision": 0.7461139896373057, + "eval_ORGANIZATION_recall": 0.8421052631578947, + "eval_PERSON_f1": 0.929889298892989, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9124087591240876, - "eval_PERSON_recall": 0.9541984732824428, - "eval_loss": 0.12766310572624207, + "eval_PERSON_precision": 0.9, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.1342812180519104, "eval_overall_accuracy": 0.9652029826014913, - "eval_overall_f1": 0.8381642512077295, - "eval_overall_precision": 0.8203309692671394, - "eval_overall_recall": 0.8567901234567902, - "eval_runtime": 0.5896, - "eval_samples_per_second": 288.354, - "eval_steps_per_second": 5.089, + "eval_overall_f1": 0.8379351740696278, + "eval_overall_precision": 0.8154205607476636, + "eval_overall_recall": 0.8617283950617284, + "eval_runtime": 0.2747, + "eval_samples_per_second": 618.771, + "eval_steps_per_second": 10.919, "step": 768 }, { "epoch": 9.0, - "grad_norm": 1.0895642042160034, + "grad_norm": 0.889248788356781, "learning_rate": 4.55e-05, - "loss": 0.1495, + "loss": 0.1541, "step": 864 }, { "epoch": 9.0, - "eval_LOCATION_f1": 0.835820895522388, + "eval_LOCATION_f1": 0.8217821782178217, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8571428571428571, - "eval_LOCATION_recall": 0.8155339805825242, - "eval_ORGANIZATION_f1": 0.8099173553719009, + "eval_LOCATION_precision": 0.8383838383838383, + "eval_LOCATION_recall": 0.8058252427184466, + "eval_ORGANIZATION_f1": 0.814404432132964, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.765625, + "eval_ORGANIZATION_precision": 0.7736842105263158, "eval_ORGANIZATION_recall": 0.8596491228070176, - "eval_PERSON_f1": 0.9338235294117646, - "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.900709219858156, - "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.11188717931509018, - "eval_overall_accuracy": 0.9690693178679922, - "eval_overall_f1": 0.8564593301435406, - "eval_overall_precision": 0.8306264501160093, - "eval_overall_recall": 0.8839506172839506, - "eval_runtime": 0.6012, - "eval_samples_per_second": 282.766, - "eval_steps_per_second": 4.99, + "eval_PERSON_f1": 0.9264705882352942, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.8936170212765957, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.11753819137811661, + "eval_overall_accuracy": 0.968240817453742, + "eval_overall_f1": 0.852694610778443, + "eval_overall_precision": 0.827906976744186, + "eval_overall_recall": 0.8790123456790123, + "eval_runtime": 0.2808, + "eval_samples_per_second": 605.329, + "eval_steps_per_second": 10.682, "step": 864 }, { "epoch": 10.0, - "grad_norm": 1.217275857925415, + "grad_norm": 0.8680849075317383, "learning_rate": 4.5e-05, - "loss": 0.1342, + "loss": 0.1387, "step": 960 }, { "epoch": 10.0, - "eval_LOCATION_f1": 0.8374384236453202, + "eval_LOCATION_f1": 0.8195121951219512, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.85, - "eval_LOCATION_recall": 0.8252427184466019, - "eval_ORGANIZATION_f1": 0.8277777777777778, + "eval_LOCATION_precision": 0.8235294117647058, + "eval_LOCATION_recall": 0.8155339805825242, + "eval_ORGANIZATION_f1": 0.8287292817679557, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.7883597883597884, - "eval_ORGANIZATION_recall": 0.8713450292397661, - "eval_PERSON_f1": 0.929889298892989, - "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9, - "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.10086647421121597, - "eval_overall_accuracy": 0.9707263186964927, - "eval_overall_f1": 0.8633093525179856, - "eval_overall_precision": 0.8391608391608392, - "eval_overall_recall": 0.8888888888888888, - "eval_runtime": 0.6119, - "eval_samples_per_second": 277.843, - "eval_steps_per_second": 4.903, + "eval_ORGANIZATION_precision": 0.7853403141361257, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9304029304029305, + "eval_PERSON_number": 131, + "eval_PERSON_precision": 0.8943661971830986, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.10947263240814209, + "eval_overall_accuracy": 0.9696216514774924, + "eval_overall_f1": 0.8595238095238096, + "eval_overall_precision": 0.8298850574712644, + "eval_overall_recall": 0.891358024691358, + "eval_runtime": 0.2731, + "eval_samples_per_second": 622.547, + "eval_steps_per_second": 10.986, "step": 960 }, { "epoch": 11.0, - "grad_norm": 2.0349202156066895, + "grad_norm": 0.9291552901268005, "learning_rate": 4.4500000000000004e-05, - "loss": 0.1241, + "loss": 0.1275, "step": 1056 }, { "epoch": 11.0, - "eval_LOCATION_f1": 0.8365384615384616, + "eval_LOCATION_f1": 0.8374384236453202, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8285714285714286, - "eval_LOCATION_recall": 0.8446601941747572, - "eval_ORGANIZATION_f1": 0.8342857142857143, + "eval_LOCATION_precision": 0.85, + "eval_LOCATION_recall": 0.8252427184466019, + "eval_ORGANIZATION_f1": 0.8333333333333334, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8156424581005587, - "eval_ORGANIZATION_recall": 0.8538011695906432, - "eval_PERSON_f1": 0.9438202247191011, + "eval_ORGANIZATION_precision": 0.7936507936507936, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.929889298892989, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9264705882352942, + "eval_PERSON_precision": 0.9, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.09149681031703949, - "eval_overall_accuracy": 0.9726594863297432, - "eval_overall_f1": 0.8703030303030304, - "eval_overall_precision": 0.8547619047619047, - "eval_overall_recall": 0.8864197530864197, - "eval_runtime": 0.5981, - "eval_samples_per_second": 284.231, - "eval_steps_per_second": 5.016, + "eval_loss": 0.09946061670780182, + "eval_overall_accuracy": 0.9710024855012428, + "eval_overall_f1": 0.8657074340527577, + "eval_overall_precision": 0.8414918414918415, + "eval_overall_recall": 0.891358024691358, + "eval_runtime": 0.277, + "eval_samples_per_second": 613.762, + "eval_steps_per_second": 10.831, "step": 1056 }, { "epoch": 12.0, - "grad_norm": 3.014051914215088, + "grad_norm": 0.6819791793823242, "learning_rate": 4.4000000000000006e-05, - "loss": 0.1188, + "loss": 0.1212, "step": 1152 }, { "epoch": 12.0, - "eval_LOCATION_f1": 0.8309178743961353, + "eval_LOCATION_f1": 0.8640776699029126, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8269230769230769, - "eval_LOCATION_recall": 0.8349514563106796, - "eval_ORGANIZATION_f1": 0.8579545454545455, + "eval_LOCATION_precision": 0.8640776699029126, + "eval_LOCATION_recall": 0.8640776699029126, + "eval_ORGANIZATION_f1": 0.837465564738292, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8342541436464088, - "eval_ORGANIZATION_recall": 0.8830409356725146, - "eval_PERSON_f1": 0.9402985074626866, + "eval_ORGANIZATION_precision": 0.7916666666666666, + "eval_ORGANIZATION_recall": 0.8888888888888888, + "eval_PERSON_f1": 0.929889298892989, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9197080291970803, + "eval_PERSON_precision": 0.9, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.08539355546236038, - "eval_overall_accuracy": 0.9745926539629937, - "eval_overall_f1": 0.8778718258766627, - "eval_overall_precision": 0.8601895734597157, - "eval_overall_recall": 0.8962962962962963, - "eval_runtime": 0.5994, - "eval_samples_per_second": 283.636, - "eval_steps_per_second": 5.005, + "eval_loss": 0.09351213276386261, + "eval_overall_accuracy": 0.9723833195249931, + "eval_overall_f1": 0.8738095238095238, + "eval_overall_precision": 0.8436781609195402, + "eval_overall_recall": 0.9061728395061729, + "eval_runtime": 0.2767, + "eval_samples_per_second": 614.42, + "eval_steps_per_second": 10.843, "step": 1152 }, { "epoch": 13.0, - "grad_norm": 2.8539180755615234, + "grad_norm": 1.4507771730422974, "learning_rate": 4.35e-05, - "loss": 0.1102, + "loss": 0.1164, "step": 1248 }, { "epoch": 13.0, - "eval_LOCATION_f1": 0.8695652173913043, + "eval_LOCATION_f1": 0.8585365853658536, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8653846153846154, - "eval_LOCATION_recall": 0.8737864077669902, - "eval_ORGANIZATION_f1": 0.866096866096866, + "eval_LOCATION_precision": 0.8627450980392157, + "eval_LOCATION_recall": 0.8543689320388349, + "eval_ORGANIZATION_f1": 0.845303867403315, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8444444444444444, - "eval_ORGANIZATION_recall": 0.8888888888888888, - "eval_PERSON_f1": 0.9473684210526315, + "eval_ORGANIZATION_precision": 0.8010471204188482, + "eval_ORGANIZATION_recall": 0.8947368421052632, + "eval_PERSON_f1": 0.929889298892989, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9333333333333333, + "eval_PERSON_precision": 0.9, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.07981602847576141, - "eval_overall_accuracy": 0.9762496547914941, - "eval_overall_f1": 0.8932038834951457, - "eval_overall_precision": 0.8782816229116945, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.598, - "eval_samples_per_second": 284.265, - "eval_steps_per_second": 5.016, + "eval_loss": 0.08751274645328522, + "eval_overall_accuracy": 0.9723833195249931, + "eval_overall_f1": 0.8758949880668259, + "eval_overall_precision": 0.8475750577367206, + "eval_overall_recall": 0.9061728395061729, + "eval_runtime": 0.2748, + "eval_samples_per_second": 618.687, + "eval_steps_per_second": 10.918, "step": 1248 }, { "epoch": 14.0, - "grad_norm": 0.624473512172699, + "grad_norm": 0.5297168493270874, "learning_rate": 4.3e-05, - "loss": 0.1044, + "loss": 0.1105, "step": 1344 }, { "epoch": 14.0, - "eval_LOCATION_f1": 0.8768472906403941, + "eval_LOCATION_f1": 0.8878048780487806, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.89, - "eval_LOCATION_recall": 0.8640776699029126, - "eval_ORGANIZATION_f1": 0.8830409356725146, + "eval_LOCATION_precision": 0.8921568627450981, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8587896253602305, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8830409356725146, - "eval_ORGANIZATION_recall": 0.8830409356725146, - "eval_PERSON_f1": 0.9509433962264152, + "eval_ORGANIZATION_precision": 0.8465909090909091, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9438202247191011, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9402985074626866, + "eval_PERSON_precision": 0.9264705882352942, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.07802519202232361, - "eval_overall_accuracy": 0.9781828224247445, - "eval_overall_f1": 0.9037037037037037, - "eval_overall_precision": 0.9037037037037037, + "eval_loss": 0.08204468339681625, + "eval_overall_accuracy": 0.9768019884009942, + "eval_overall_f1": 0.8937728937728937, + "eval_overall_precision": 0.8840579710144928, "eval_overall_recall": 0.9037037037037037, - "eval_runtime": 0.5951, - "eval_samples_per_second": 285.656, - "eval_steps_per_second": 5.041, + "eval_runtime": 0.2712, + "eval_samples_per_second": 626.816, + "eval_steps_per_second": 11.061, "step": 1344 }, { "epoch": 15.0, - "grad_norm": 0.996662437915802, + "grad_norm": 0.8479740023612976, "learning_rate": 4.25e-05, - "loss": 0.1009, + "loss": 0.1063, "step": 1440 }, { "epoch": 15.0, - "eval_LOCATION_f1": 0.891089108910891, + "eval_LOCATION_f1": 0.89, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.9090909090909091, - "eval_LOCATION_recall": 0.8737864077669902, - "eval_ORGANIZATION_f1": 0.8788732394366197, + "eval_LOCATION_precision": 0.9175257731958762, + "eval_LOCATION_recall": 0.8640776699029126, + "eval_ORGANIZATION_f1": 0.8446866485013624, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8478260869565217, - "eval_ORGANIZATION_recall": 0.9122807017543859, - "eval_PERSON_f1": 0.9584905660377357, + "eval_ORGANIZATION_precision": 0.7908163265306123, + "eval_ORGANIZATION_recall": 0.9064327485380117, + "eval_PERSON_f1": 0.9333333333333332, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9477611940298507, - "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.07211567461490631, - "eval_overall_accuracy": 0.9781828224247445, - "eval_overall_f1": 0.9075425790754258, - "eval_overall_precision": 0.894484412470024, - "eval_overall_recall": 0.9209876543209876, - "eval_runtime": 0.5925, - "eval_samples_per_second": 286.903, - "eval_steps_per_second": 5.063, + "eval_PERSON_precision": 0.9064748201438849, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.07925137132406235, + "eval_overall_accuracy": 0.9751449875724938, + "eval_overall_f1": 0.8841099163679809, + "eval_overall_precision": 0.8564814814814815, + "eval_overall_recall": 0.9135802469135802, + "eval_runtime": 0.2741, + "eval_samples_per_second": 620.179, + "eval_steps_per_second": 10.944, "step": 1440 }, { "epoch": 16.0, - "grad_norm": 1.0493874549865723, + "grad_norm": 1.6623704433441162, "learning_rate": 4.2e-05, - "loss": 0.0978, + "loss": 0.1018, "step": 1536 }, { "epoch": 16.0, - "eval_LOCATION_f1": 0.8932038834951457, + "eval_LOCATION_f1": 0.8846153846153847, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8932038834951457, + "eval_LOCATION_precision": 0.8761904761904762, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8725212464589235, + "eval_ORGANIZATION_f1": 0.8555858310626703, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8461538461538461, - "eval_ORGANIZATION_recall": 0.9005847953216374, - "eval_PERSON_f1": 0.9509433962264152, + "eval_ORGANIZATION_precision": 0.8010204081632653, + "eval_ORGANIZATION_recall": 0.9181286549707602, + "eval_PERSON_f1": 0.9333333333333332, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9402985074626866, + "eval_PERSON_precision": 0.9064748201438849, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.06955698877573013, - "eval_overall_accuracy": 0.9779066556199945, - "eval_overall_f1": 0.9029126213592233, - "eval_overall_precision": 0.8878281622911695, - "eval_overall_recall": 0.9185185185185185, - "eval_runtime": 0.6569, - "eval_samples_per_second": 258.806, - "eval_steps_per_second": 4.567, + "eval_loss": 0.07826149463653564, + "eval_overall_accuracy": 0.9748688207677437, + "eval_overall_f1": 0.8875739644970415, + "eval_overall_precision": 0.8522727272727273, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2734, + "eval_samples_per_second": 621.692, + "eval_steps_per_second": 10.971, "step": 1536 }, { "epoch": 17.0, - "grad_norm": 0.8617897629737854, + "grad_norm": 0.8166274428367615, "learning_rate": 4.15e-05, - "loss": 0.0962, + "loss": 0.0986, "step": 1632 }, { "epoch": 17.0, - "eval_LOCATION_f1": 0.9064039408866995, + "eval_LOCATION_f1": 0.9019607843137256, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.92, + "eval_LOCATION_precision": 0.9108910891089109, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8869565217391304, + "eval_ORGANIZATION_f1": 0.8627450980392157, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8793103448275862, - "eval_ORGANIZATION_recall": 0.8947368421052632, - "eval_PERSON_f1": 0.962121212121212, + "eval_ORGANIZATION_precision": 0.8279569892473119, + "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_PERSON_f1": 0.9548872180451129, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9548872180451128, + "eval_PERSON_precision": 0.9407407407407408, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.06796535849571228, - "eval_overall_accuracy": 0.980115990057995, - "eval_overall_f1": 0.916256157635468, - "eval_overall_precision": 0.914004914004914, - "eval_overall_recall": 0.9185185185185185, - "eval_runtime": 0.6013, - "eval_samples_per_second": 282.726, - "eval_steps_per_second": 4.989, + "eval_loss": 0.07254834473133087, + "eval_overall_accuracy": 0.9779066556199945, + "eval_overall_f1": 0.9020556227327691, + "eval_overall_precision": 0.8838862559241706, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.274, + "eval_samples_per_second": 620.343, + "eval_steps_per_second": 10.947, "step": 1632 }, { "epoch": 18.0, - "grad_norm": 0.8590739965438843, + "grad_norm": 0.7182817459106445, "learning_rate": 4.1e-05, - "loss": 0.0909, + "loss": 0.093, "step": 1728 }, { "epoch": 18.0, - "eval_LOCATION_f1": 0.896551724137931, + "eval_LOCATION_f1": 0.8921568627450982, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.91, + "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.883495145631068, - "eval_ORGANIZATION_f1": 0.8831908831908832, + "eval_ORGANIZATION_f1": 0.8764044943820224, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8611111111111112, - "eval_ORGANIZATION_recall": 0.9064327485380117, - "eval_PERSON_f1": 0.9509433962264152, + "eval_ORGANIZATION_precision": 0.8432432432432433, + "eval_ORGANIZATION_recall": 0.9122807017543859, + "eval_PERSON_f1": 0.9473684210526315, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9402985074626866, + "eval_PERSON_precision": 0.9333333333333333, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.0644010454416275, - "eval_overall_accuracy": 0.9798398232532449, - "eval_overall_f1": 0.9084249084249084, - "eval_overall_precision": 0.8985507246376812, - "eval_overall_recall": 0.9185185185185185, - "eval_runtime": 0.5942, - "eval_samples_per_second": 286.117, - "eval_steps_per_second": 5.049, + "eval_loss": 0.06929948180913925, + "eval_overall_accuracy": 0.9779066556199945, + "eval_overall_f1": 0.9031476997578692, + "eval_overall_precision": 0.8859857482185273, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.2726, + "eval_samples_per_second": 623.704, + "eval_steps_per_second": 11.007, "step": 1728 }, { "epoch": 19.0, - "grad_norm": 2.493396520614624, + "grad_norm": 1.4325016736984253, "learning_rate": 4.05e-05, - "loss": 0.088, + "loss": 0.0897, "step": 1824 }, { "epoch": 19.0, - "eval_LOCATION_f1": 0.9009900990099011, + "eval_LOCATION_f1": 0.8846153846153847, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.9191919191919192, - "eval_LOCATION_recall": 0.883495145631068, - "eval_ORGANIZATION_f1": 0.8908045977011494, + "eval_LOCATION_precision": 0.8761904761904762, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8757062146892655, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8757062146892656, + "eval_ORGANIZATION_precision": 0.8469945355191257, "eval_ORGANIZATION_recall": 0.9064327485380117, - "eval_PERSON_f1": 0.9545454545454546, + "eval_PERSON_f1": 0.9477611940298507, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9473684210526315, - "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.0634172335267067, - "eval_overall_accuracy": 0.9806683236674951, - "eval_overall_f1": 0.914004914004914, - "eval_overall_precision": 0.9095354523227384, - "eval_overall_recall": 0.9185185185185185, - "eval_runtime": 0.5943, - "eval_samples_per_second": 286.044, - "eval_steps_per_second": 5.048, + "eval_PERSON_precision": 0.927007299270073, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.06990908831357956, + "eval_overall_accuracy": 0.9781828224247445, + "eval_overall_f1": 0.9012048192771085, + "eval_overall_precision": 0.88, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2715, + "eval_samples_per_second": 626.106, + "eval_steps_per_second": 11.049, "step": 1824 }, { "epoch": 20.0, - "grad_norm": 0.5532075762748718, + "grad_norm": 0.7544979453086853, "learning_rate": 4e-05, - "loss": 0.0836, + "loss": 0.0876, "step": 1920 }, { @@ -610,153 +610,153 @@ "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8846153846153846, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8863636363636364, + "eval_ORGANIZATION_f1": 0.8611111111111113, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.861878453038674, - "eval_ORGANIZATION_recall": 0.9122807017543859, - "eval_PERSON_f1": 0.9509433962264152, + "eval_ORGANIZATION_precision": 0.8201058201058201, + "eval_ORGANIZATION_recall": 0.9064327485380117, + "eval_PERSON_f1": 0.9333333333333332, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9402985074626866, + "eval_PERSON_precision": 0.9064748201438849, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.06357116252183914, - "eval_overall_accuracy": 0.9798398232532449, - "eval_overall_f1": 0.9077669902912622, - "eval_overall_precision": 0.8926014319809069, - "eval_overall_recall": 0.9234567901234568, - "eval_runtime": 0.5958, - "eval_samples_per_second": 285.346, - "eval_steps_per_second": 5.036, + "eval_loss": 0.06785619258880615, + "eval_overall_accuracy": 0.9765258215962441, + "eval_overall_f1": 0.8912783751493429, + "eval_overall_precision": 0.8634259259259259, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.2728, + "eval_samples_per_second": 623.147, + "eval_steps_per_second": 10.997, "step": 1920 }, { "epoch": 21.0, - "grad_norm": 1.6820136308670044, + "grad_norm": 0.9009093046188354, "learning_rate": 3.9500000000000005e-05, - "loss": 0.0836, + "loss": 0.0846, "step": 2016 }, { "epoch": 21.0, - "eval_LOCATION_f1": 0.8888888888888888, + "eval_LOCATION_f1": 0.8803827751196173, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8846153846153846, + "eval_LOCATION_precision": 0.8679245283018868, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8760806916426512, + "eval_ORGANIZATION_f1": 0.8707865168539327, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8636363636363636, - "eval_ORGANIZATION_recall": 0.8888888888888888, - "eval_PERSON_f1": 0.9545454545454546, + "eval_ORGANIZATION_precision": 0.8378378378378378, + "eval_ORGANIZATION_recall": 0.9064327485380117, + "eval_PERSON_f1": 0.9402985074626866, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_precision": 0.9197080291970803, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.05993271991610527, - "eval_overall_accuracy": 0.9806683236674951, - "eval_overall_f1": 0.9046454767726161, - "eval_overall_precision": 0.8958837772397095, - "eval_overall_recall": 0.9135802469135802, - "eval_runtime": 0.5978, - "eval_samples_per_second": 284.362, - "eval_steps_per_second": 5.018, + "eval_loss": 0.06543691456317902, + "eval_overall_accuracy": 0.9784589892294946, + "eval_overall_f1": 0.8955582232893157, + "eval_overall_precision": 0.8714953271028038, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.2743, + "eval_samples_per_second": 619.702, + "eval_steps_per_second": 10.936, "step": 2016 }, { "epoch": 22.0, - "grad_norm": 0.9439449906349182, + "grad_norm": 1.1362422704696655, "learning_rate": 3.9000000000000006e-05, - "loss": 0.0827, + "loss": 0.0843, "step": 2112 }, { "epoch": 22.0, - "eval_LOCATION_f1": 0.8761904761904761, + "eval_LOCATION_f1": 0.8932038834951457, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8598130841121495, + "eval_LOCATION_precision": 0.8932038834951457, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8757062146892655, + "eval_ORGANIZATION_f1": 0.8784530386740331, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8469945355191257, - "eval_ORGANIZATION_recall": 0.9064327485380117, - "eval_PERSON_f1": 0.9509433962264152, + "eval_ORGANIZATION_precision": 0.8324607329842932, + "eval_ORGANIZATION_recall": 0.9298245614035088, + "eval_PERSON_f1": 0.9402985074626866, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9402985074626866, + "eval_PERSON_precision": 0.9197080291970803, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.05957403779029846, - "eval_overall_accuracy": 0.9798398232532449, - "eval_overall_f1": 0.8998793727382389, - "eval_overall_precision": 0.8797169811320755, - "eval_overall_recall": 0.9209876543209876, - "eval_runtime": 0.5967, - "eval_samples_per_second": 284.901, - "eval_steps_per_second": 5.028, + "eval_loss": 0.06642135977745056, + "eval_overall_accuracy": 0.9787351560342447, + "eval_overall_f1": 0.9019138755980862, + "eval_overall_precision": 0.8747099767981439, + "eval_overall_recall": 0.9308641975308642, + "eval_runtime": 0.2703, + "eval_samples_per_second": 628.899, + "eval_steps_per_second": 11.098, "step": 2112 }, { "epoch": 23.0, - "grad_norm": 1.140651822090149, + "grad_norm": 1.139490008354187, "learning_rate": 3.85e-05, - "loss": 0.077, + "loss": 0.0823, "step": 2208 }, { "epoch": 23.0, - "eval_LOCATION_f1": 0.8750000000000001, + "eval_LOCATION_f1": 0.8803827751196173, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8666666666666667, - "eval_LOCATION_recall": 0.883495145631068, - "eval_ORGANIZATION_f1": 0.8818443804034583, + "eval_LOCATION_precision": 0.8679245283018868, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8685714285714285, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8693181818181818, - "eval_ORGANIZATION_recall": 0.8947368421052632, + "eval_ORGANIZATION_precision": 0.8491620111731844, + "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.058286890387535095, - "eval_overall_accuracy": 0.9806683236674951, - "eval_overall_f1": 0.9070904645476773, - "eval_overall_precision": 0.8983050847457628, + "eval_loss": 0.06113022193312645, + "eval_overall_accuracy": 0.980115990057995, + "eval_overall_f1": 0.902676399026764, + "eval_overall_precision": 0.8896882494004796, "eval_overall_recall": 0.9160493827160494, - "eval_runtime": 0.6001, - "eval_samples_per_second": 283.301, - "eval_steps_per_second": 4.999, + "eval_runtime": 0.2712, + "eval_samples_per_second": 626.885, + "eval_steps_per_second": 11.063, "step": 2208 }, { "epoch": 24.0, - "grad_norm": 0.9434022307395935, + "grad_norm": 0.9067116975784302, "learning_rate": 3.8e-05, - "loss": 0.0774, + "loss": 0.0808, "step": 2304 }, { "epoch": 24.0, - "eval_LOCATION_f1": 0.861111111111111, + "eval_LOCATION_f1": 0.8666666666666667, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8230088495575221, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8529411764705882, + "eval_LOCATION_precision": 0.8504672897196262, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8700564971751413, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8579881656804734, - "eval_ORGANIZATION_recall": 0.847953216374269, - "eval_PERSON_f1": 0.9545454545454546, + "eval_ORGANIZATION_precision": 0.8415300546448088, + "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9473684210526315, - "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.05800532177090645, - "eval_overall_accuracy": 0.9787351560342447, - "eval_overall_f1": 0.8878048780487805, - "eval_overall_precision": 0.8771084337349397, - "eval_overall_recall": 0.8987654320987655, - "eval_runtime": 0.6032, - "eval_samples_per_second": 281.813, - "eval_steps_per_second": 4.973, + "eval_PERSON_precision": 0.9548872180451128, + "eval_PERSON_recall": 0.9694656488549618, + "eval_loss": 0.0627431645989418, + "eval_overall_accuracy": 0.9798398232532449, + "eval_overall_f1": 0.8985507246376813, + "eval_overall_precision": 0.8794326241134752, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.2724, + "eval_samples_per_second": 624.125, + "eval_steps_per_second": 11.014, "step": 2304 }, { "epoch": 25.0, - "grad_norm": 1.0291742086410522, + "grad_norm": 1.520267128944397, "learning_rate": 3.7500000000000003e-05, - "loss": 0.0782, + "loss": 0.0809, "step": 2400 }, { @@ -765,60 +765,60 @@ "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.883495145631068, - "eval_ORGANIZATION_f1": 0.8816568047337278, + "eval_ORGANIZATION_f1": 0.8732394366197183, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8922155688622755, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.842391304347826, + "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.05705862119793892, - "eval_overall_accuracy": 0.9798398232532449, - "eval_overall_f1": 0.9048207663782447, - "eval_overall_precision": 0.905940594059406, - "eval_overall_recall": 0.9037037037037037, - "eval_runtime": 0.6018, - "eval_samples_per_second": 282.474, - "eval_steps_per_second": 4.985, + "eval_loss": 0.05983828753232956, + "eval_overall_accuracy": 0.9806683236674951, + "eval_overall_f1": 0.9007263922518159, + "eval_overall_precision": 0.8836104513064132, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.272, + "eval_samples_per_second": 624.952, + "eval_steps_per_second": 11.029, "step": 2400 }, { "epoch": 26.0, - "grad_norm": 0.6928281784057617, + "grad_norm": 0.6365208029747009, "learning_rate": 3.7e-05, - "loss": 0.0771, + "loss": 0.078, "step": 2496 }, { "epoch": 26.0, - "eval_LOCATION_f1": 0.8975609756097561, + "eval_LOCATION_f1": 0.8921568627450982, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.9019607843137255, - "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8977272727272728, + "eval_LOCATION_precision": 0.900990099009901, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8795518207282913, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8729281767955801, - "eval_ORGANIZATION_recall": 0.9239766081871345, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.8440860215053764, + "eval_ORGANIZATION_recall": 0.9181286549707602, + "eval_PERSON_f1": 0.9509433962264152, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, - "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.055970560759305954, - "eval_overall_accuracy": 0.9820491576912456, - "eval_overall_f1": 0.9195121951219513, - "eval_overall_precision": 0.908433734939759, - "eval_overall_recall": 0.9308641975308642, - "eval_runtime": 0.6049, - "eval_samples_per_second": 281.02, - "eval_steps_per_second": 4.959, + "eval_PERSON_precision": 0.9402985074626866, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.058052726089954376, + "eval_overall_accuracy": 0.9817729908864954, + "eval_overall_f1": 0.9055690072639224, + "eval_overall_precision": 0.8883610451306413, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.273, + "eval_samples_per_second": 622.679, + "eval_steps_per_second": 10.988, "step": 2496 }, { "epoch": 27.0, - "grad_norm": 1.4192240238189697, + "grad_norm": 1.504185676574707, "learning_rate": 3.65e-05, - "loss": 0.0726, + "loss": 0.0774, "step": 2592 }, { @@ -827,215 +827,215 @@ "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.92, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.9008498583569404, + "eval_ORGANIZATION_f1": 0.888268156424581, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8736263736263736, + "eval_ORGANIZATION_precision": 0.8502673796791443, "eval_ORGANIZATION_recall": 0.9298245614035088, - "eval_PERSON_f1": 0.9545454545454546, + "eval_PERSON_f1": 0.9509433962264152, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_precision": 0.9402985074626866, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.05344228819012642, - "eval_overall_accuracy": 0.9826014913007457, - "eval_overall_f1": 0.9195121951219513, - "eval_overall_precision": 0.908433734939759, + "eval_loss": 0.058188218623399734, + "eval_overall_accuracy": 0.9812206572769953, + "eval_overall_f1": 0.9128329297820823, + "eval_overall_precision": 0.8954869358669834, "eval_overall_recall": 0.9308641975308642, - "eval_runtime": 0.605, - "eval_samples_per_second": 281.0, - "eval_steps_per_second": 4.959, + "eval_runtime": 0.2726, + "eval_samples_per_second": 623.662, + "eval_steps_per_second": 11.006, "step": 2592 }, { "epoch": 28.0, - "grad_norm": 1.0541571378707886, + "grad_norm": 0.8313342928886414, "learning_rate": 3.6e-05, - "loss": 0.0712, + "loss": 0.0732, "step": 2688 }, { "epoch": 28.0, - "eval_LOCATION_f1": 0.8932038834951457, + "eval_LOCATION_f1": 0.8975609756097561, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8932038834951457, + "eval_LOCATION_precision": 0.9019607843137255, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8939828080229225, + "eval_ORGANIZATION_f1": 0.8857142857142858, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8764044943820225, - "eval_ORGANIZATION_recall": 0.9122807017543859, + "eval_ORGANIZATION_precision": 0.8659217877094972, + "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.05282684788107872, - "eval_overall_accuracy": 0.9826014913007457, - "eval_overall_f1": 0.9133089133089134, - "eval_overall_precision": 0.9033816425120773, - "eval_overall_recall": 0.9234567901234568, - "eval_runtime": 0.6016, - "eval_samples_per_second": 282.587, - "eval_steps_per_second": 4.987, + "eval_loss": 0.062320366501808167, + "eval_overall_accuracy": 0.9814968240817453, + "eval_overall_f1": 0.9108669108669109, + "eval_overall_precision": 0.9009661835748792, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.273, + "eval_samples_per_second": 622.639, + "eval_steps_per_second": 10.988, "step": 2688 }, { "epoch": 29.0, - "grad_norm": 0.6951781511306763, + "grad_norm": 0.6814190149307251, "learning_rate": 3.55e-05, - "loss": 0.0703, + "loss": 0.0746, "step": 2784 }, { "epoch": 29.0, - "eval_LOCATION_f1": 0.9009900990099011, + "eval_LOCATION_f1": 0.9019607843137256, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.9191919191919192, - "eval_LOCATION_recall": 0.883495145631068, - "eval_ORGANIZATION_f1": 0.9008498583569404, + "eval_LOCATION_precision": 0.9108910891089109, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.9028571428571428, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8736263736263736, - "eval_ORGANIZATION_recall": 0.9298245614035088, + "eval_ORGANIZATION_precision": 0.88268156424581, + "eval_ORGANIZATION_recall": 0.9239766081871345, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.05054891109466553, - "eval_overall_accuracy": 0.9845346589339962, - "eval_overall_f1": 0.9217603911980441, - "eval_overall_precision": 0.9128329297820823, + "eval_loss": 0.05526537075638771, + "eval_overall_accuracy": 0.9828776581054958, + "eval_overall_f1": 0.922888616891065, + "eval_overall_precision": 0.9150485436893204, "eval_overall_recall": 0.9308641975308642, - "eval_runtime": 0.5939, - "eval_samples_per_second": 286.258, - "eval_steps_per_second": 5.052, + "eval_runtime": 0.2751, + "eval_samples_per_second": 617.869, + "eval_steps_per_second": 10.904, "step": 2784 }, { "epoch": 30.0, - "grad_norm": 0.4239547550678253, + "grad_norm": 1.8307719230651855, "learning_rate": 3.5e-05, - "loss": 0.0649, + "loss": 0.0695, "step": 2880 }, { "epoch": 30.0, - "eval_LOCATION_f1": 0.9054726368159205, + "eval_LOCATION_f1": 0.9019607843137256, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.9285714285714286, - "eval_LOCATION_recall": 0.883495145631068, - "eval_ORGANIZATION_f1": 0.9065155807365438, + "eval_LOCATION_precision": 0.9108910891089109, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.9028571428571428, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8791208791208791, - "eval_ORGANIZATION_recall": 0.935672514619883, + "eval_ORGANIZATION_precision": 0.88268156424581, + "eval_ORGANIZATION_recall": 0.9239766081871345, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.05008954927325249, - "eval_overall_accuracy": 0.983706158519746, - "eval_overall_f1": 0.9217603911980441, - "eval_overall_precision": 0.9128329297820823, - "eval_overall_recall": 0.9308641975308642, - "eval_runtime": 0.606, - "eval_samples_per_second": 280.539, - "eval_steps_per_second": 4.951, + "eval_loss": 0.05361052602529526, + "eval_overall_accuracy": 0.9831538249102458, + "eval_overall_f1": 0.9193154034229828, + "eval_overall_precision": 0.910411622276029, + "eval_overall_recall": 0.928395061728395, + "eval_runtime": 0.2712, + "eval_samples_per_second": 626.924, + "eval_steps_per_second": 11.063, "step": 2880 }, { "epoch": 31.0, - "grad_norm": 1.2522234916687012, + "grad_norm": 0.5288766026496887, "learning_rate": 3.45e-05, - "loss": 0.0642, + "loss": 0.0691, "step": 2976 }, { "epoch": 31.0, - "eval_LOCATION_f1": 0.8720379146919431, + "eval_LOCATION_f1": 0.8846153846153847, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8518518518518519, + "eval_LOCATION_precision": 0.8761904761904762, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8786127167630058, + "eval_ORGANIZATION_f1": 0.893371757925072, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8685714285714285, - "eval_ORGANIZATION_recall": 0.8888888888888888, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.8806818181818182, + "eval_ORGANIZATION_recall": 0.9064327485380117, + "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, - "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.05007793754339218, - "eval_overall_accuracy": 0.9820491576912456, - "eval_overall_f1": 0.9048780487804877, - "eval_overall_precision": 0.8939759036144578, - "eval_overall_recall": 0.9160493827160494, - "eval_runtime": 0.5983, - "eval_samples_per_second": 284.148, - "eval_steps_per_second": 5.014, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.05325145646929741, + "eval_overall_accuracy": 0.9826014913007457, + "eval_overall_f1": 0.9108669108669109, + "eval_overall_precision": 0.9009661835748792, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.2729, + "eval_samples_per_second": 622.963, + "eval_steps_per_second": 10.993, "step": 2976 }, { "epoch": 32.0, - "grad_norm": 0.8345080018043518, + "grad_norm": 0.7085039019584656, "learning_rate": 3.4000000000000007e-05, - "loss": 0.0664, + "loss": 0.0665, "step": 3072 }, { "epoch": 32.0, - "eval_LOCATION_f1": 0.8625592417061612, + "eval_LOCATION_f1": 0.883495145631068, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8425925925925926, + "eval_LOCATION_precision": 0.883495145631068, "eval_LOCATION_recall": 0.883495145631068, - "eval_ORGANIZATION_f1": 0.8835820895522388, + "eval_ORGANIZATION_f1": 0.8825214899713466, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.9024390243902439, - "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_ORGANIZATION_precision": 0.8651685393258427, + "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, - "eval_loss": 0.05058050900697708, - "eval_overall_accuracy": 0.9817729908864954, - "eval_overall_f1": 0.9012345679012346, - "eval_overall_precision": 0.9012345679012346, - "eval_overall_recall": 0.9012345679012346, - "eval_runtime": 0.6023, - "eval_samples_per_second": 282.266, - "eval_steps_per_second": 4.981, + "eval_loss": 0.05177022144198418, + "eval_overall_accuracy": 0.9823253244959956, + "eval_overall_f1": 0.905982905982906, + "eval_overall_precision": 0.8961352657004831, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.2709, + "eval_samples_per_second": 627.553, + "eval_steps_per_second": 11.074, "step": 3072 }, { "epoch": 33.0, - "grad_norm": 0.7092554569244385, + "grad_norm": 0.6961383819580078, "learning_rate": 3.35e-05, - "loss": 0.0659, + "loss": 0.0649, "step": 3168 }, { "epoch": 33.0, - "eval_LOCATION_f1": 0.8584905660377358, + "eval_LOCATION_f1": 0.8598130841121495, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8348623853211009, - "eval_LOCATION_recall": 0.883495145631068, - "eval_ORGANIZATION_f1": 0.8842729970326408, + "eval_LOCATION_precision": 0.8288288288288288, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8975903614457831, - "eval_ORGANIZATION_recall": 0.8713450292397661, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.901840490797546, + "eval_ORGANIZATION_recall": 0.8596491228070176, + "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, - "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.0509335920214653, - "eval_overall_accuracy": 0.9820491576912456, - "eval_overall_f1": 0.9039408866995073, - "eval_overall_precision": 0.9017199017199017, - "eval_overall_recall": 0.9061728395061729, - "eval_runtime": 0.5971, - "eval_samples_per_second": 284.704, - "eval_steps_per_second": 5.024, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.052676014602184296, + "eval_overall_accuracy": 0.9809444904722452, + "eval_overall_f1": 0.8990147783251232, + "eval_overall_precision": 0.8968058968058968, + "eval_overall_recall": 0.9012345679012346, + "eval_runtime": 0.273, + "eval_samples_per_second": 622.752, + "eval_steps_per_second": 10.99, "step": 3168 }, { "epoch": 34.0, - "grad_norm": 0.8459410071372986, + "grad_norm": 0.3737340569496155, "learning_rate": 3.3e-05, - "loss": 0.0667, + "loss": 0.0645, "step": 3264 }, { @@ -1044,277 +1044,277 @@ "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.883495145631068, "eval_LOCATION_recall": 0.883495145631068, - "eval_ORGANIZATION_f1": 0.8908045977011494, + "eval_ORGANIZATION_f1": 0.9043478260869565, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8757062146892656, - "eval_ORGANIZATION_recall": 0.9064327485380117, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.896551724137931, + "eval_ORGANIZATION_recall": 0.9122807017543859, + "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, - "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.050714682787656784, - "eval_overall_accuracy": 0.9826014913007457, - "eval_overall_f1": 0.9130966952264382, - "eval_overall_precision": 0.9053398058252428, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.05056975781917572, + "eval_overall_accuracy": 0.9842584921292461, + "eval_overall_f1": 0.9153374233128834, + "eval_overall_precision": 0.9097560975609756, "eval_overall_recall": 0.9209876543209876, - "eval_runtime": 0.5977, - "eval_samples_per_second": 284.432, - "eval_steps_per_second": 5.019, + "eval_runtime": 0.2748, + "eval_samples_per_second": 618.671, + "eval_steps_per_second": 10.918, "step": 3264 }, { "epoch": 35.0, - "grad_norm": 1.9158438444137573, + "grad_norm": 1.4867804050445557, "learning_rate": 3.2500000000000004e-05, - "loss": 0.0639, + "loss": 0.063, "step": 3360 }, { "epoch": 35.0, - "eval_LOCATION_f1": 0.8761904761904761, + "eval_LOCATION_f1": 0.8666666666666667, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8598130841121495, - "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8934911242603552, + "eval_LOCATION_precision": 0.8504672897196262, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8888888888888888, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.9041916167664671, - "eval_ORGANIZATION_recall": 0.8830409356725146, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.8888888888888888, + "eval_ORGANIZATION_recall": 0.8888888888888888, + "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, - "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.0509558841586113, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.051477495580911636, "eval_overall_accuracy": 0.9826014913007457, - "eval_overall_f1": 0.9124537607891491, - "eval_overall_precision": 0.9113300492610837, - "eval_overall_recall": 0.9135802469135802, - "eval_runtime": 0.6048, - "eval_samples_per_second": 281.07, - "eval_steps_per_second": 4.96, + "eval_overall_f1": 0.9044117647058824, + "eval_overall_precision": 0.8978102189781022, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.2727, + "eval_samples_per_second": 623.41, + "eval_steps_per_second": 11.001, "step": 3360 }, { "epoch": 36.0, - "grad_norm": 1.0001205205917358, + "grad_norm": 0.9459262490272522, "learning_rate": 3.2000000000000005e-05, - "loss": 0.0632, + "loss": 0.0637, "step": 3456 }, { "epoch": 36.0, - "eval_LOCATION_f1": 0.8846153846153847, + "eval_LOCATION_f1": 0.8666666666666667, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8761904761904762, - "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.9032258064516129, + "eval_LOCATION_precision": 0.8504672897196262, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8830409356725146, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.9058823529411765, - "eval_ORGANIZATION_recall": 0.9005847953216374, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.8830409356725146, + "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, - "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.050493232905864716, - "eval_overall_accuracy": 0.983706158519746, - "eval_overall_f1": 0.91871921182266, - "eval_overall_precision": 0.9164619164619164, - "eval_overall_recall": 0.9209876543209876, - "eval_runtime": 0.5977, - "eval_samples_per_second": 284.414, - "eval_steps_per_second": 5.019, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.050841640681028366, + "eval_overall_accuracy": 0.9817729908864954, + "eval_overall_f1": 0.9019607843137256, + "eval_overall_precision": 0.8953771289537713, + "eval_overall_recall": 0.908641975308642, + "eval_runtime": 0.2727, + "eval_samples_per_second": 623.352, + "eval_steps_per_second": 11.0, "step": 3456 }, { "epoch": 37.0, - "grad_norm": 2.2751259803771973, + "grad_norm": 2.0613455772399902, "learning_rate": 3.15e-05, - "loss": 0.0632, + "loss": 0.0614, "step": 3552 }, { "epoch": 37.0, - "eval_LOCATION_f1": 0.9054726368159205, + "eval_LOCATION_f1": 0.8921568627450982, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.9285714285714286, + "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.883495145631068, - "eval_ORGANIZATION_f1": 0.9132947976878613, + "eval_ORGANIZATION_f1": 0.8977272727272728, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.9028571428571428, + "eval_ORGANIZATION_precision": 0.8729281767955801, "eval_ORGANIZATION_recall": 0.9239766081871345, - "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, - "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04872192069888115, - "eval_overall_accuracy": 0.9845346589339962, - "eval_overall_f1": 0.928395061728395, - "eval_overall_precision": 0.928395061728395, - "eval_overall_recall": 0.928395061728395, - "eval_runtime": 0.5916, - "eval_samples_per_second": 287.366, - "eval_steps_per_second": 5.071, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.049506776034832, + "eval_overall_accuracy": 0.9828776581054958, + "eval_overall_f1": 0.9146341463414634, + "eval_overall_precision": 0.9036144578313253, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2728, + "eval_samples_per_second": 623.161, + "eval_steps_per_second": 10.997, "step": 3552 }, { "epoch": 38.0, - "grad_norm": 0.6949302554130554, + "grad_norm": 0.5442711710929871, "learning_rate": 3.1e-05, - "loss": 0.0605, + "loss": 0.0599, "step": 3648 }, { "epoch": 38.0, - "eval_LOCATION_f1": 0.8803827751196173, + "eval_LOCATION_f1": 0.8708133971291866, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8679245283018868, - "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8922155688622756, + "eval_LOCATION_precision": 0.8584905660377359, + "eval_LOCATION_recall": 0.883495145631068, + "eval_ORGANIZATION_f1": 0.8875739644970414, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.9141104294478528, - "eval_ORGANIZATION_recall": 0.8713450292397661, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.8982035928143712, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, - "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.05016703903675079, - "eval_overall_accuracy": 0.9831538249102458, - "eval_overall_f1": 0.913151364764268, - "eval_overall_precision": 0.9177057356608479, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.5914, - "eval_samples_per_second": 287.442, - "eval_steps_per_second": 5.073, + "eval_PERSON_precision": 0.9473684210526315, + "eval_PERSON_recall": 0.9618320610687023, + "eval_loss": 0.04945772886276245, + "eval_overall_accuracy": 0.9820491576912456, + "eval_overall_f1": 0.9050554870530209, + "eval_overall_precision": 0.9039408866995073, + "eval_overall_recall": 0.9061728395061729, + "eval_runtime": 0.2707, + "eval_samples_per_second": 628.033, + "eval_steps_per_second": 11.083, "step": 3648 }, { "epoch": 39.0, - "grad_norm": 0.5452874302864075, + "grad_norm": 0.5759150385856628, "learning_rate": 3.05e-05, - "loss": 0.0609, + "loss": 0.06, "step": 3744 }, { "epoch": 39.0, - "eval_LOCATION_f1": 0.8803827751196173, + "eval_LOCATION_f1": 0.8720379146919431, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8679245283018868, + "eval_LOCATION_precision": 0.8518518518518519, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8953488372093024, + "eval_ORGANIZATION_f1": 0.877906976744186, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8901734104046243, - "eval_ORGANIZATION_recall": 0.9005847953216374, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.8728323699421965, + "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.046954866498708725, - "eval_overall_accuracy": 0.9842584921292461, - "eval_overall_f1": 0.9142156862745098, - "eval_overall_precision": 0.9075425790754258, - "eval_overall_recall": 0.9209876543209876, - "eval_runtime": 0.6085, - "eval_samples_per_second": 279.378, - "eval_steps_per_second": 4.93, + "eval_loss": 0.0495310053229332, + "eval_overall_accuracy": 0.9820491576912456, + "eval_overall_f1": 0.9057527539779682, + "eval_overall_precision": 0.8980582524271845, + "eval_overall_recall": 0.9135802469135802, + "eval_runtime": 0.277, + "eval_samples_per_second": 613.735, + "eval_steps_per_second": 10.831, "step": 3744 }, { "epoch": 40.0, - "grad_norm": 2.737624406814575, + "grad_norm": 0.95399409532547, "learning_rate": 3e-05, - "loss": 0.0601, + "loss": 0.0576, "step": 3840 }, { "epoch": 40.0, - "eval_LOCATION_f1": 0.8878048780487806, + "eval_LOCATION_f1": 0.8750000000000001, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8921568627450981, + "eval_LOCATION_precision": 0.8666666666666667, "eval_LOCATION_recall": 0.883495145631068, - "eval_ORGANIZATION_f1": 0.9032258064516129, + "eval_ORGANIZATION_f1": 0.8941176470588235, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.9058823529411765, - "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_ORGANIZATION_precision": 0.8994082840236687, + "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.046938493847846985, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.919653893695921, - "eval_overall_precision": 0.9207920792079208, - "eval_overall_recall": 0.9185185185185185, - "eval_runtime": 0.5981, - "eval_samples_per_second": 284.253, - "eval_steps_per_second": 5.016, + "eval_loss": 0.04800355061888695, + "eval_overall_accuracy": 0.983706158519746, + "eval_overall_f1": 0.9124537607891491, + "eval_overall_precision": 0.9113300492610837, + "eval_overall_recall": 0.9135802469135802, + "eval_runtime": 0.2713, + "eval_samples_per_second": 626.595, + "eval_steps_per_second": 11.058, "step": 3840 }, { "epoch": 41.0, - "grad_norm": 0.9197700619697571, + "grad_norm": 0.808504581451416, "learning_rate": 2.95e-05, - "loss": 0.0588, + "loss": 0.0597, "step": 3936 }, { "epoch": 41.0, - "eval_LOCATION_f1": 0.9064039408866995, + "eval_LOCATION_f1": 0.8803827751196173, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.92, + "eval_LOCATION_precision": 0.8679245283018868, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.8876080691642652, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8983050847457628, - "eval_ORGANIZATION_recall": 0.9298245614035088, + "eval_ORGANIZATION_precision": 0.875, + "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04637211561203003, - "eval_overall_accuracy": 0.9864678265672466, - "eval_overall_f1": 0.9287469287469288, - "eval_overall_precision": 0.9242053789731052, - "eval_overall_recall": 0.9333333333333333, - "eval_runtime": 0.5998, - "eval_samples_per_second": 283.424, - "eval_steps_per_second": 5.002, + "eval_loss": 0.048507072031497955, + "eval_overall_accuracy": 0.9828776581054958, + "eval_overall_f1": 0.9108669108669109, + "eval_overall_precision": 0.9009661835748792, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.2721, + "eval_samples_per_second": 624.82, + "eval_steps_per_second": 11.026, "step": 3936 }, { "epoch": 42.0, - "grad_norm": 0.33734437823295593, + "grad_norm": 0.5790802240371704, "learning_rate": 2.9e-05, - "loss": 0.0576, + "loss": 0.0581, "step": 4032 }, { "epoch": 42.0, - "eval_LOCATION_f1": 0.9064039408866995, + "eval_LOCATION_f1": 0.8761904761904761, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.92, + "eval_LOCATION_precision": 0.8598130841121495, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.9111747851002865, + "eval_ORGANIZATION_f1": 0.881159420289855, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8932584269662921, - "eval_ORGANIZATION_recall": 0.9298245614035088, + "eval_ORGANIZATION_precision": 0.8735632183908046, + "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04518849030137062, - "eval_overall_accuracy": 0.9870201601767468, - "eval_overall_f1": 0.9276073619631902, - "eval_overall_precision": 0.9219512195121952, - "eval_overall_recall": 0.9333333333333333, - "eval_runtime": 0.5992, - "eval_samples_per_second": 283.689, - "eval_steps_per_second": 5.006, + "eval_loss": 0.04726165160536766, + "eval_overall_accuracy": 0.9828776581054958, + "eval_overall_f1": 0.9070904645476773, + "eval_overall_precision": 0.8983050847457628, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.2765, + "eval_samples_per_second": 614.808, + "eval_steps_per_second": 10.85, "step": 4032 }, { "epoch": 43.0, - "grad_norm": 0.9250084757804871, + "grad_norm": 0.46468445658683777, "learning_rate": 2.8499999999999998e-05, - "loss": 0.057, + "loss": 0.0597, "step": 4128 }, { @@ -1323,502 +1323,502 @@ "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8679245283018868, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8941176470588235, + "eval_ORGANIZATION_f1": 0.881159420289855, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8994082840236687, + "eval_ORGANIZATION_precision": 0.8735632183908046, "eval_ORGANIZATION_recall": 0.8888888888888888, - "eval_PERSON_f1": 0.9657794676806084, + "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04571167007088661, - "eval_overall_accuracy": 0.9853631593482464, - "eval_overall_f1": 0.9137931034482758, - "eval_overall_precision": 0.9115479115479116, + "eval_loss": 0.047872934490442276, + "eval_overall_accuracy": 0.9826014913007457, + "eval_overall_f1": 0.9093137254901962, + "eval_overall_precision": 0.902676399026764, "eval_overall_recall": 0.9160493827160494, - "eval_runtime": 0.5947, - "eval_samples_per_second": 285.867, - "eval_steps_per_second": 5.045, + "eval_runtime": 0.2722, + "eval_samples_per_second": 624.585, + "eval_steps_per_second": 11.022, "step": 4128 }, { "epoch": 44.0, - "grad_norm": 1.0702989101409912, + "grad_norm": 0.5926026105880737, "learning_rate": 2.8000000000000003e-05, - "loss": 0.056, + "loss": 0.0568, "step": 4224 }, { "epoch": 44.0, - "eval_LOCATION_f1": 0.8932038834951457, + "eval_LOCATION_f1": 0.8720379146919431, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8932038834951457, + "eval_LOCATION_precision": 0.8518518518518519, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8979591836734693, + "eval_ORGANIZATION_f1": 0.8875739644970414, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8953488372093024, - "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_ORGANIZATION_precision": 0.8982035928143712, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04532807692885399, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.91871921182266, - "eval_overall_precision": 0.9164619164619164, - "eval_overall_recall": 0.9209876543209876, - "eval_runtime": 0.6003, - "eval_samples_per_second": 283.17, - "eval_steps_per_second": 4.997, + "eval_loss": 0.04811384528875351, + "eval_overall_accuracy": 0.9826014913007457, + "eval_overall_f1": 0.9088669950738916, + "eval_overall_precision": 0.9066339066339066, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.2715, + "eval_samples_per_second": 626.105, + "eval_steps_per_second": 11.049, "step": 4224 }, { "epoch": 45.0, - "grad_norm": 1.9211422204971313, + "grad_norm": 1.227881669998169, "learning_rate": 2.7500000000000004e-05, - "loss": 0.0557, + "loss": 0.0561, "step": 4320 }, { "epoch": 45.0, - "eval_LOCATION_f1": 0.8761904761904761, + "eval_LOCATION_f1": 0.8720379146919431, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8598130841121495, + "eval_LOCATION_precision": 0.8518518518518519, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8914956011730205, + "eval_ORGANIZATION_f1": 0.8823529411764705, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8941176470588236, - "eval_ORGANIZATION_recall": 0.8888888888888888, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.8875739644970414, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.0455101914703846, - "eval_overall_accuracy": 0.9850869925434963, - "eval_overall_f1": 0.9115479115479117, - "eval_overall_precision": 0.9070904645476773, - "eval_overall_recall": 0.9160493827160494, - "eval_runtime": 0.6588, - "eval_samples_per_second": 258.043, - "eval_steps_per_second": 4.554, + "eval_loss": 0.04695962369441986, + "eval_overall_accuracy": 0.9834299917149959, + "eval_overall_f1": 0.9077490774907748, + "eval_overall_precision": 0.9044117647058824, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.2758, + "eval_samples_per_second": 616.422, + "eval_steps_per_second": 10.878, "step": 4320 }, { "epoch": 46.0, - "grad_norm": 0.976703941822052, + "grad_norm": 1.023648738861084, "learning_rate": 2.7000000000000002e-05, - "loss": 0.0565, + "loss": 0.0552, "step": 4416 }, { "epoch": 46.0, - "eval_LOCATION_f1": 0.8651162790697675, + "eval_LOCATION_f1": 0.8720379146919431, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8303571428571429, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8776119402985074, + "eval_LOCATION_precision": 0.8518518518518519, + "eval_LOCATION_recall": 0.8932038834951457, + "eval_ORGANIZATION_f1": 0.890207715133531, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8963414634146342, - "eval_ORGANIZATION_recall": 0.8596491228070176, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.9036144578313253, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04744512960314751, - "eval_overall_accuracy": 0.9828776581054958, - "eval_overall_f1": 0.902829028290283, - "eval_overall_precision": 0.8995098039215687, - "eval_overall_recall": 0.9061728395061729, - "eval_runtime": 0.5904, - "eval_samples_per_second": 287.926, - "eval_steps_per_second": 5.081, + "eval_loss": 0.04779404029250145, + "eval_overall_accuracy": 0.983706158519746, + "eval_overall_f1": 0.9111111111111111, + "eval_overall_precision": 0.9111111111111111, + "eval_overall_recall": 0.9111111111111111, + "eval_runtime": 0.2711, + "eval_samples_per_second": 627.02, + "eval_steps_per_second": 11.065, "step": 4416 }, { "epoch": 47.0, - "grad_norm": 0.8773436546325684, + "grad_norm": 0.36909419298171997, "learning_rate": 2.6500000000000004e-05, - "loss": 0.0534, + "loss": 0.0562, "step": 4512 }, { "epoch": 47.0, - "eval_LOCATION_f1": 0.8932038834951457, + "eval_LOCATION_f1": 0.8846153846153847, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8932038834951457, + "eval_LOCATION_precision": 0.8761904761904762, "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.8793103448275862, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8908045977011494, - "eval_ORGANIZATION_recall": 0.9064327485380117, + "eval_ORGANIZATION_precision": 0.864406779661017, + "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04476306214928627, - "eval_overall_accuracy": 0.9859154929577465, - "eval_overall_f1": 0.920049200492005, - "eval_overall_precision": 0.9166666666666666, - "eval_overall_recall": 0.9234567901234568, - "eval_runtime": 0.5945, - "eval_samples_per_second": 285.942, - "eval_steps_per_second": 5.046, + "eval_loss": 0.046062689274549484, + "eval_overall_accuracy": 0.983982325324496, + "eval_overall_f1": 0.9095354523227382, + "eval_overall_precision": 0.9007263922518159, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.272, + "eval_samples_per_second": 624.965, + "eval_steps_per_second": 11.029, "step": 4512 }, { "epoch": 48.0, - "grad_norm": 1.278594970703125, + "grad_norm": 0.6459914445877075, "learning_rate": 2.6000000000000002e-05, - "loss": 0.0523, + "loss": 0.0533, "step": 4608 }, { "epoch": 48.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.8826291079812206, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8961424332344213, + "eval_LOCATION_precision": 0.8545454545454545, + "eval_LOCATION_recall": 0.912621359223301, + "eval_ORGANIZATION_f1": 0.8895522388059701, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.9096385542168675, - "eval_ORGANIZATION_recall": 0.8830409356725146, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.9085365853658537, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04515359550714493, - "eval_overall_accuracy": 0.9859154929577465, - "eval_overall_f1": 0.9149198520345252, - "eval_overall_precision": 0.9137931034482759, - "eval_overall_recall": 0.9160493827160494, - "eval_runtime": 0.5953, - "eval_samples_per_second": 285.59, - "eval_steps_per_second": 5.04, + "eval_loss": 0.04743105545639992, + "eval_overall_accuracy": 0.983706158519746, + "eval_overall_f1": 0.9135802469135802, + "eval_overall_precision": 0.9135802469135802, + "eval_overall_recall": 0.9135802469135802, + "eval_runtime": 0.2736, + "eval_samples_per_second": 621.325, + "eval_steps_per_second": 10.965, "step": 4608 }, { "epoch": 49.0, - "grad_norm": 0.56380695104599, + "grad_norm": 0.17258469760417938, "learning_rate": 2.5500000000000003e-05, - "loss": 0.0523, + "loss": 0.0522, "step": 4704 }, { "epoch": 49.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.8785046728971964, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8856304985337242, + "eval_LOCATION_precision": 0.8468468468468469, + "eval_LOCATION_recall": 0.912621359223301, + "eval_ORGANIZATION_f1": 0.8771929824561403, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.888235294117647, - "eval_ORGANIZATION_recall": 0.8830409356725146, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.8771929824561403, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04556996375322342, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9104294478527607, - "eval_overall_precision": 0.9048780487804878, + "eval_loss": 0.04609883204102516, + "eval_overall_accuracy": 0.9842584921292461, + "eval_overall_f1": 0.9070904645476773, + "eval_overall_precision": 0.8983050847457628, "eval_overall_recall": 0.9160493827160494, - "eval_runtime": 0.5918, - "eval_samples_per_second": 287.26, - "eval_steps_per_second": 5.069, + "eval_runtime": 0.2713, + "eval_samples_per_second": 626.665, + "eval_steps_per_second": 11.059, "step": 4704 }, { "epoch": 50.0, - "grad_norm": 1.6369190216064453, + "grad_norm": 1.7320947647094727, "learning_rate": 2.5e-05, - "loss": 0.0509, + "loss": 0.052, "step": 4800 }, { "epoch": 50.0, - "eval_LOCATION_f1": 0.8773584905660378, + "eval_LOCATION_f1": 0.8878504672897196, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8532110091743119, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8802395209580838, + "eval_LOCATION_precision": 0.8558558558558559, + "eval_LOCATION_recall": 0.9223300970873787, + "eval_ORGANIZATION_f1": 0.8869565217391304, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.901840490797546, - "eval_ORGANIZATION_recall": 0.8596491228070176, + "eval_ORGANIZATION_precision": 0.8793103448275862, + "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.046390656381845474, - "eval_overall_accuracy": 0.9828776581054958, - "eval_overall_f1": 0.9084158415841586, - "eval_overall_precision": 0.9106699751861043, - "eval_overall_recall": 0.9061728395061729, - "eval_runtime": 0.6054, - "eval_samples_per_second": 280.8, - "eval_steps_per_second": 4.955, + "eval_loss": 0.04637792333960533, + "eval_overall_accuracy": 0.983982325324496, + "eval_overall_f1": 0.9135200974421438, + "eval_overall_precision": 0.9014423076923077, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.271, + "eval_samples_per_second": 627.338, + "eval_steps_per_second": 11.071, "step": 4800 }, { "epoch": 51.0, - "grad_norm": 0.7033815383911133, + "grad_norm": 0.8508228063583374, "learning_rate": 2.45e-05, - "loss": 0.052, + "loss": 0.054, "step": 4896 }, { "epoch": 51.0, - "eval_LOCATION_f1": 0.8663594470046084, + "eval_LOCATION_f1": 0.8930232558139534, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8245614035087719, - "eval_LOCATION_recall": 0.912621359223301, - "eval_ORGANIZATION_f1": 0.8795180722891567, + "eval_LOCATION_precision": 0.8571428571428571, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.8869047619047619, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.906832298136646, - "eval_ORGANIZATION_recall": 0.8538011695906432, + "eval_ORGANIZATION_precision": 0.9030303030303031, + "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04676185920834541, - "eval_overall_accuracy": 0.9842584921292461, - "eval_overall_f1": 0.9050554870530209, - "eval_overall_precision": 0.9039408866995073, - "eval_overall_recall": 0.9061728395061729, - "eval_runtime": 0.5936, - "eval_samples_per_second": 286.386, - "eval_steps_per_second": 5.054, + "eval_loss": 0.046650972217321396, + "eval_overall_accuracy": 0.9853631593482464, + "eval_overall_f1": 0.915129151291513, + "eval_overall_precision": 0.9117647058823529, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.2718, + "eval_samples_per_second": 625.424, + "eval_steps_per_second": 11.037, "step": 4896 }, { "epoch": 52.0, - "grad_norm": 0.6778050065040588, + "grad_norm": 0.6740456223487854, "learning_rate": 2.4e-05, - "loss": 0.0511, + "loss": 0.0525, "step": 4992 }, { "epoch": 52.0, - "eval_LOCATION_f1": 0.8975609756097561, + "eval_LOCATION_f1": 0.9158878504672898, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.9019607843137255, - "eval_LOCATION_recall": 0.8932038834951457, - "eval_ORGANIZATION_f1": 0.8979591836734693, + "eval_LOCATION_precision": 0.8828828828828829, + "eval_LOCATION_recall": 0.9514563106796117, + "eval_ORGANIZATION_f1": 0.872093023255814, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8953488372093024, - "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_ORGANIZATION_precision": 0.8670520231213873, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04431942105293274, - "eval_overall_accuracy": 0.9861916597624966, - "eval_overall_f1": 0.9209876543209876, - "eval_overall_precision": 0.9209876543209876, - "eval_overall_recall": 0.9209876543209876, - "eval_runtime": 0.6009, - "eval_samples_per_second": 282.922, - "eval_steps_per_second": 4.993, + "eval_loss": 0.04597054049372673, + "eval_overall_accuracy": 0.983982325324496, + "eval_overall_f1": 0.9146341463414634, + "eval_overall_precision": 0.9036144578313253, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2723, + "eval_samples_per_second": 624.335, + "eval_steps_per_second": 11.018, "step": 4992 }, { "epoch": 53.0, - "grad_norm": 0.7755054831504822, + "grad_norm": 1.4370176792144775, "learning_rate": 2.35e-05, - "loss": 0.051, + "loss": 0.0501, "step": 5088 }, { "epoch": 53.0, - "eval_LOCATION_f1": 0.8773584905660378, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8532110091743119, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8875739644970414, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8816568047337278, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8982035928143712, - "eval_ORGANIZATION_recall": 0.8771929824561403, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.8922155688622755, + "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.045676685869693756, - "eval_overall_accuracy": 0.983982325324496, - "eval_overall_f1": 0.9102091020910209, - "eval_overall_precision": 0.9068627450980392, - "eval_overall_recall": 0.9135802469135802, - "eval_runtime": 0.61, - "eval_samples_per_second": 278.668, - "eval_steps_per_second": 4.918, + "eval_loss": 0.04658225178718567, + "eval_overall_accuracy": 0.9848108257387462, + "eval_overall_f1": 0.9164619164619164, + "eval_overall_precision": 0.9119804400977995, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.2733, + "eval_samples_per_second": 622.121, + "eval_steps_per_second": 10.979, "step": 5088 }, { "epoch": 54.0, - "grad_norm": 0.47155851125717163, + "grad_norm": 1.064245343208313, "learning_rate": 2.3000000000000003e-05, - "loss": 0.0483, + "loss": 0.0498, "step": 5184 }, { "epoch": 54.0, - "eval_LOCATION_f1": 0.9029126213592235, + "eval_LOCATION_f1": 0.8962264150943398, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.9029126213592233, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.9017341040462428, + "eval_LOCATION_precision": 0.8715596330275229, + "eval_LOCATION_recall": 0.9223300970873787, + "eval_ORGANIZATION_f1": 0.880466472303207, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8914285714285715, - "eval_ORGANIZATION_recall": 0.9122807017543859, + "eval_ORGANIZATION_precision": 0.877906976744186, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.044774431735277176, - "eval_overall_accuracy": 0.9856393261529964, - "eval_overall_f1": 0.9238329238329238, - "eval_overall_precision": 0.9193154034229829, - "eval_overall_recall": 0.928395061728395, - "eval_runtime": 0.5995, - "eval_samples_per_second": 283.593, - "eval_steps_per_second": 5.005, + "eval_loss": 0.04490743577480316, + "eval_overall_accuracy": 0.983982325324496, + "eval_overall_f1": 0.9130966952264382, + "eval_overall_precision": 0.9053398058252428, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.2734, + "eval_samples_per_second": 621.817, + "eval_steps_per_second": 10.973, "step": 5184 }, { "epoch": 55.0, - "grad_norm": 1.5237046480178833, + "grad_norm": 2.0687813758850098, "learning_rate": 2.25e-05, - "loss": 0.0525, + "loss": 0.0504, "step": 5280 }, { "epoch": 55.0, - "eval_LOCATION_f1": 0.8899521531100479, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8773584905660378, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8953488372093024, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8875739644970414, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8901734104046243, - "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_ORGANIZATION_precision": 0.8982035928143712, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04513506591320038, + "eval_loss": 0.045612651854753494, "eval_overall_accuracy": 0.9853631593482464, - "eval_overall_f1": 0.9177914110429448, - "eval_overall_precision": 0.9121951219512195, + "eval_overall_f1": 0.918918918918919, + "eval_overall_precision": 0.9144254278728606, "eval_overall_recall": 0.9234567901234568, - "eval_runtime": 0.6045, - "eval_samples_per_second": 281.204, - "eval_steps_per_second": 4.962, + "eval_runtime": 0.2766, + "eval_samples_per_second": 614.623, + "eval_steps_per_second": 10.846, "step": 5280 }, { "epoch": 56.0, - "grad_norm": 0.6528781652450562, + "grad_norm": 1.0828304290771484, "learning_rate": 2.2000000000000003e-05, - "loss": 0.0479, + "loss": 0.0486, "step": 5376 }, { "epoch": 56.0, - "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8691588785046729, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8914956011730205, + "eval_LOCATION_precision": 0.8727272727272727, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.881159420289855, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8941176470588236, + "eval_ORGANIZATION_precision": 0.8735632183908046, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04449770227074623, - "eval_overall_accuracy": 0.9850869925434963, - "eval_overall_f1": 0.915129151291513, - "eval_overall_precision": 0.9117647058823529, - "eval_overall_recall": 0.9185185185185185, - "eval_runtime": 0.5981, - "eval_samples_per_second": 284.219, - "eval_steps_per_second": 5.016, + "eval_loss": 0.045335978269577026, + "eval_overall_accuracy": 0.9845346589339962, + "eval_overall_f1": 0.9146341463414634, + "eval_overall_precision": 0.9036144578313253, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2731, + "eval_samples_per_second": 622.495, + "eval_steps_per_second": 10.985, "step": 5376 }, { "epoch": 57.0, - "grad_norm": 1.4258739948272705, + "grad_norm": 1.297290563583374, "learning_rate": 2.15e-05, - "loss": 0.0486, + "loss": 0.0497, "step": 5472 }, { "epoch": 57.0, - "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_f1": 0.8940092165898617, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8691588785046729, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8888888888888888, + "eval_LOCATION_precision": 0.8508771929824561, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8875739644970414, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8888888888888888, - "eval_ORGANIZATION_recall": 0.8888888888888888, - "eval_PERSON_f1": 0.9657794676806084, + "eval_ORGANIZATION_precision": 0.8982035928143712, + "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, - "eval_PERSON_precision": 0.9621212121212122, + "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.044527389109134674, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9128834355828221, - "eval_overall_precision": 0.9073170731707317, - "eval_overall_recall": 0.9185185185185185, - "eval_runtime": 0.597, - "eval_samples_per_second": 284.74, - "eval_steps_per_second": 5.025, + "eval_loss": 0.045681606978178024, + "eval_overall_accuracy": 0.9845346589339962, + "eval_overall_f1": 0.9155446756425949, + "eval_overall_precision": 0.9077669902912622, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2721, + "eval_samples_per_second": 624.822, + "eval_steps_per_second": 11.026, "step": 5472 }, { "epoch": 58.0, - "grad_norm": 0.6324487924575806, + "grad_norm": 0.472746878862381, "learning_rate": 2.1e-05, - "loss": 0.0457, + "loss": 0.0487, "step": 5568 }, { "epoch": 58.0, - "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8691588785046729, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8947368421052632, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.890207715133531, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8947368421052632, - "eval_ORGANIZATION_recall": 0.8947368421052632, + "eval_ORGANIZATION_precision": 0.9036144578313253, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04367503523826599, - "eval_overall_accuracy": 0.9856393261529964, - "eval_overall_f1": 0.9164619164619164, - "eval_overall_precision": 0.9119804400977995, - "eval_overall_recall": 0.9209876543209876, - "eval_runtime": 0.6033, - "eval_samples_per_second": 281.778, - "eval_steps_per_second": 4.973, + "eval_loss": 0.04600377753376961, + "eval_overall_accuracy": 0.9853631593482464, + "eval_overall_f1": 0.920049200492005, + "eval_overall_precision": 0.9166666666666666, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2747, + "eval_samples_per_second": 618.786, + "eval_steps_per_second": 10.92, "step": 5568 }, { "epoch": 59.0, - "grad_norm": 1.5328294038772583, + "grad_norm": 0.6368147730827332, "learning_rate": 2.05e-05, - "loss": 0.0478, + "loss": 0.0473, "step": 5664 }, { "epoch": 59.0, - "eval_LOCATION_f1": 0.8773584905660378, + "eval_LOCATION_f1": 0.8878504672897196, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8532110091743119, - "eval_LOCATION_recall": 0.9029126213592233, + "eval_LOCATION_precision": 0.8558558558558559, + "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8816568047337278, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8922155688622755, @@ -1827,1295 +1827,1295 @@ "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04407254979014397, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9088669950738916, - "eval_overall_precision": 0.9066339066339066, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.6034, - "eval_samples_per_second": 281.724, - "eval_steps_per_second": 4.972, + "eval_loss": 0.045563556253910065, + "eval_overall_accuracy": 0.9850869925434963, + "eval_overall_f1": 0.9115479115479117, + "eval_overall_precision": 0.9070904645476773, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.2709, + "eval_samples_per_second": 627.644, + "eval_steps_per_second": 11.076, "step": 5664 }, { "epoch": 60.0, - "grad_norm": 0.5337828993797302, + "grad_norm": 0.35717150568962097, "learning_rate": 2e-05, - "loss": 0.0472, + "loss": 0.0463, "step": 5760 }, { "epoch": 60.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.8878504672897196, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8849557522123894, + "eval_LOCATION_precision": 0.8558558558558559, + "eval_LOCATION_recall": 0.9223300970873787, + "eval_ORGANIZATION_f1": 0.8809523809523809, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8928571428571429, - "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_ORGANIZATION_precision": 0.896969696969697, + "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04403013363480568, - "eval_overall_accuracy": 0.9850869925434963, + "eval_loss": 0.04541854187846184, + "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9113300492610836, "eval_overall_precision": 0.9090909090909091, "eval_overall_recall": 0.9135802469135802, - "eval_runtime": 0.5975, - "eval_samples_per_second": 284.5, - "eval_steps_per_second": 5.021, + "eval_runtime": 0.2737, + "eval_samples_per_second": 621.148, + "eval_steps_per_second": 10.961, "step": 5760 }, { "epoch": 61.0, - "grad_norm": 0.9366702437400818, + "grad_norm": 1.0991231203079224, "learning_rate": 1.9500000000000003e-05, - "loss": 0.049, + "loss": 0.0486, "step": 5856 }, { "epoch": 61.0, - "eval_LOCATION_f1": 0.8942307692307693, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8857142857142857, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8895348837209303, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8809523809523809, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.884393063583815, - "eval_ORGANIZATION_recall": 0.8947368421052632, + "eval_ORGANIZATION_precision": 0.896969696969697, + "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04451928660273552, - "eval_overall_accuracy": 0.9853631593482464, - "eval_overall_f1": 0.9164619164619164, - "eval_overall_precision": 0.9119804400977995, - "eval_overall_recall": 0.9209876543209876, - "eval_runtime": 0.6062, - "eval_samples_per_second": 280.441, - "eval_steps_per_second": 4.949, + "eval_loss": 0.045604679733514786, + "eval_overall_accuracy": 0.9856393261529964, + "eval_overall_f1": 0.916256157635468, + "eval_overall_precision": 0.914004914004914, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.2735, + "eval_samples_per_second": 621.534, + "eval_steps_per_second": 10.968, "step": 5856 }, { "epoch": 62.0, - "grad_norm": 1.8338637351989746, + "grad_norm": 1.2870984077453613, "learning_rate": 1.9e-05, - "loss": 0.0476, + "loss": 0.0484, "step": 5952 }, { "epoch": 62.0, - "eval_LOCATION_f1": 0.869158878504673, + "eval_LOCATION_f1": 0.8930232558139534, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8378378378378378, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.875739644970414, + "eval_LOCATION_precision": 0.8571428571428571, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.8809523809523809, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8862275449101796, + "eval_ORGANIZATION_precision": 0.896969696969697, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.045562200248241425, - "eval_overall_accuracy": 0.983982325324496, - "eval_overall_f1": 0.9041769041769042, - "eval_overall_precision": 0.8997555012224939, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.6542, - "eval_samples_per_second": 259.871, - "eval_steps_per_second": 4.586, + "eval_loss": 0.04645540565252304, + "eval_overall_accuracy": 0.9850869925434963, + "eval_overall_f1": 0.9126691266912669, + "eval_overall_precision": 0.9093137254901961, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.2716, + "eval_samples_per_second": 626.017, + "eval_steps_per_second": 11.047, "step": 5952 }, { "epoch": 63.0, - "grad_norm": 2.0841386318206787, + "grad_norm": 1.264487862586975, "learning_rate": 1.85e-05, - "loss": 0.0457, + "loss": 0.0461, "step": 6048 }, { "epoch": 63.0, - "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_f1": 0.9116279069767441, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8691588785046729, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8979591836734693, + "eval_LOCATION_precision": 0.875, + "eval_LOCATION_recall": 0.9514563106796117, + "eval_ORGANIZATION_f1": 0.8908554572271385, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8953488372093024, - "eval_ORGANIZATION_recall": 0.9005847953216374, + "eval_ORGANIZATION_precision": 0.8988095238095238, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.043963029980659485, - "eval_overall_accuracy": 0.9859154929577465, - "eval_overall_f1": 0.9177914110429448, - "eval_overall_precision": 0.9121951219512195, - "eval_overall_recall": 0.9234567901234568, - "eval_runtime": 0.6065, - "eval_samples_per_second": 280.291, - "eval_steps_per_second": 4.946, + "eval_loss": 0.04506193846464157, + "eval_overall_accuracy": 0.9856393261529964, + "eval_overall_f1": 0.9215686274509803, + "eval_overall_precision": 0.9148418491484185, + "eval_overall_recall": 0.928395061728395, + "eval_runtime": 0.2722, + "eval_samples_per_second": 624.428, + "eval_steps_per_second": 11.019, "step": 6048 }, { "epoch": 64.0, - "grad_norm": 0.9337228536605835, + "grad_norm": 0.35966232419013977, "learning_rate": 1.8e-05, - "loss": 0.0446, + "loss": 0.0455, "step": 6144 }, { "epoch": 64.0, - "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8691588785046729, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8882352941176469, + "eval_LOCATION_precision": 0.8727272727272727, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.8842729970326408, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.893491124260355, - "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_ORGANIZATION_precision": 0.8975903614457831, + "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04478294402360916, - "eval_overall_accuracy": 0.9845346589339962, - "eval_overall_f1": 0.9137931034482758, - "eval_overall_precision": 0.9115479115479116, - "eval_overall_recall": 0.9160493827160494, - "eval_runtime": 0.5976, - "eval_samples_per_second": 284.482, - "eval_steps_per_second": 5.02, + "eval_loss": 0.04505002498626709, + "eval_overall_accuracy": 0.9853631593482464, + "eval_overall_f1": 0.916256157635468, + "eval_overall_precision": 0.914004914004914, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.2717, + "eval_samples_per_second": 625.717, + "eval_steps_per_second": 11.042, "step": 6144 }, { "epoch": 65.0, - "grad_norm": 1.0935461521148682, + "grad_norm": 0.5010517239570618, "learning_rate": 1.75e-05, - "loss": 0.0465, + "loss": 0.0472, "step": 6240 }, { "epoch": 65.0, - "eval_LOCATION_f1": 0.8826291079812206, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8545454545454545, - "eval_LOCATION_recall": 0.912621359223301, - "eval_ORGANIZATION_f1": 0.8908554572271385, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8869047619047619, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8988095238095238, - "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_ORGANIZATION_precision": 0.9030303030303031, + "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04469531029462814, - "eval_overall_accuracy": 0.9856393261529964, - "eval_overall_f1": 0.914004914004914, - "eval_overall_precision": 0.9095354523227384, - "eval_overall_recall": 0.9185185185185185, - "eval_runtime": 0.603, - "eval_samples_per_second": 281.935, - "eval_steps_per_second": 4.975, + "eval_loss": 0.04529740288853645, + "eval_overall_accuracy": 0.9859154929577465, + "eval_overall_f1": 0.91871921182266, + "eval_overall_precision": 0.9164619164619164, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.2726, + "eval_samples_per_second": 623.611, + "eval_steps_per_second": 11.005, "step": 6240 }, { "epoch": 66.0, - "grad_norm": 0.588843584060669, + "grad_norm": 0.859156608581543, "learning_rate": 1.7000000000000003e-05, - "loss": 0.0456, + "loss": 0.0453, "step": 6336 }, { "epoch": 66.0, - "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8691588785046729, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8842729970326408, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8955223880597015, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8975903614457831, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9146341463414634, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04511041194200516, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9122373300370827, - "eval_overall_precision": 0.9133663366336634, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.5959, - "eval_samples_per_second": 285.269, - "eval_steps_per_second": 5.034, + "eval_loss": 0.04506004601716995, + "eval_overall_accuracy": 0.9864678265672466, + "eval_overall_f1": 0.9223181257706535, + "eval_overall_precision": 0.9211822660098522, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.273, + "eval_samples_per_second": 622.688, + "eval_steps_per_second": 10.989, "step": 6336 }, { "epoch": 67.0, - "grad_norm": 0.40730783343315125, + "grad_norm": 0.3856222927570343, "learning_rate": 1.65e-05, - "loss": 0.043, + "loss": 0.045, "step": 6432 }, { "epoch": 67.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.874251497005988, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8895522388059701, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8957055214723927, - "eval_ORGANIZATION_recall": 0.8538011695906432, + "eval_ORGANIZATION_precision": 0.9085365853658537, + "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04662081599235535, - "eval_overall_accuracy": 0.9823253244959956, - "eval_overall_f1": 0.9070631970260223, - "eval_overall_precision": 0.9104477611940298, - "eval_overall_recall": 0.9037037037037037, - "eval_runtime": 0.5968, - "eval_samples_per_second": 284.867, - "eval_steps_per_second": 5.027, + "eval_loss": 0.04503821209073067, + "eval_overall_accuracy": 0.9856393261529964, + "eval_overall_f1": 0.9198520345252774, + "eval_overall_precision": 0.9187192118226601, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.2725, + "eval_samples_per_second": 623.806, + "eval_steps_per_second": 11.008, "step": 6432 }, { "epoch": 68.0, - "grad_norm": 1.010898470878601, + "grad_norm": 0.5940878987312317, "learning_rate": 1.6000000000000003e-05, - "loss": 0.0441, + "loss": 0.0466, "step": 6528 }, { "epoch": 68.0, - "eval_LOCATION_f1": 0.8909952606635072, + "eval_LOCATION_f1": 0.9201877934272301, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8703703703703703, - "eval_LOCATION_recall": 0.912621359223301, - "eval_ORGANIZATION_f1": 0.879765395894428, + "eval_LOCATION_precision": 0.8909090909090909, + "eval_LOCATION_recall": 0.9514563106796117, + "eval_ORGANIZATION_f1": 0.890207715133531, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8823529411764706, + "eval_ORGANIZATION_precision": 0.9036144578313253, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04551886394619942, - "eval_overall_accuracy": 0.9845346589339962, - "eval_overall_f1": 0.9115479115479117, - "eval_overall_precision": 0.9070904645476773, - "eval_overall_recall": 0.9160493827160494, - "eval_runtime": 0.6631, - "eval_samples_per_second": 256.37, - "eval_steps_per_second": 4.524, + "eval_loss": 0.044038835912942886, + "eval_overall_accuracy": 0.9867439933719967, + "eval_overall_f1": 0.9236453201970444, + "eval_overall_precision": 0.9213759213759214, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2728, + "eval_samples_per_second": 623.229, + "eval_steps_per_second": 10.998, "step": 6528 }, { "epoch": 69.0, - "grad_norm": 0.356332927942276, + "grad_norm": 1.601568579673767, "learning_rate": 1.55e-05, - "loss": 0.0439, + "loss": 0.046, "step": 6624 }, { "epoch": 69.0, - "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_f1": 0.8971962616822431, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8691588785046729, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.879765395894428, + "eval_LOCATION_precision": 0.8648648648648649, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.8895522388059701, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8823529411764706, - "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_ORGANIZATION_precision": 0.9085365853658537, + "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04542705416679382, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9102091020910209, - "eval_overall_precision": 0.9068627450980392, - "eval_overall_recall": 0.9135802469135802, - "eval_runtime": 0.5976, - "eval_samples_per_second": 284.455, - "eval_steps_per_second": 5.02, + "eval_loss": 0.04461726173758507, + "eval_overall_accuracy": 0.9853631593482464, + "eval_overall_f1": 0.9173859432799014, + "eval_overall_precision": 0.916256157635468, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.2738, + "eval_samples_per_second": 620.821, + "eval_steps_per_second": 10.956, "step": 6624 }, { "epoch": 70.0, - "grad_norm": 0.5880185961723328, + "grad_norm": 2.521439552307129, "learning_rate": 1.5e-05, - "loss": 0.0439, + "loss": 0.0436, "step": 6720 }, { "epoch": 70.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.8971962616822431, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8882352941176469, + "eval_LOCATION_precision": 0.8648648648648649, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.8928571428571428, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.893491124260355, - "eval_ORGANIZATION_recall": 0.8830409356725146, + "eval_ORGANIZATION_precision": 0.9090909090909091, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.044994134455919266, - "eval_overall_accuracy": 0.9850869925434963, - "eval_overall_f1": 0.9126691266912669, - "eval_overall_precision": 0.9093137254901961, - "eval_overall_recall": 0.9160493827160494, - "eval_runtime": 0.597, - "eval_samples_per_second": 284.742, - "eval_steps_per_second": 5.025, + "eval_loss": 0.04403162747621536, + "eval_overall_accuracy": 0.9861916597624966, + "eval_overall_f1": 0.91871921182266, + "eval_overall_precision": 0.9164619164619164, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.275, + "eval_samples_per_second": 618.275, + "eval_steps_per_second": 10.911, "step": 6720 }, { "epoch": 71.0, - "grad_norm": 0.9274646639823914, + "grad_norm": 0.8414034247398376, "learning_rate": 1.45e-05, - "loss": 0.0428, + "loss": 0.0445, "step": 6816 }, { "epoch": 71.0, - "eval_LOCATION_f1": 0.8952380952380954, + "eval_LOCATION_f1": 0.9074074074074074, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8785046728971962, - "eval_LOCATION_recall": 0.912621359223301, - "eval_ORGANIZATION_f1": 0.8888888888888888, + "eval_LOCATION_precision": 0.8672566371681416, + "eval_LOCATION_recall": 0.9514563106796117, + "eval_ORGANIZATION_f1": 0.8908554572271385, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8888888888888888, - "eval_ORGANIZATION_recall": 0.8888888888888888, + "eval_ORGANIZATION_precision": 0.8988095238095238, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04472970589995384, - "eval_overall_accuracy": 0.9859154929577465, - "eval_overall_f1": 0.9164619164619164, - "eval_overall_precision": 0.9119804400977995, - "eval_overall_recall": 0.9209876543209876, - "eval_runtime": 0.5987, - "eval_samples_per_second": 283.931, - "eval_steps_per_second": 5.011, + "eval_loss": 0.0446244440972805, + "eval_overall_accuracy": 0.9861916597624966, + "eval_overall_f1": 0.9204406364749083, + "eval_overall_precision": 0.912621359223301, + "eval_overall_recall": 0.928395061728395, + "eval_runtime": 0.2729, + "eval_samples_per_second": 623.038, + "eval_steps_per_second": 10.995, "step": 6816 }, { "epoch": 72.0, - "grad_norm": 0.35504868626594543, + "grad_norm": 0.7483597993850708, "learning_rate": 1.4000000000000001e-05, - "loss": 0.0438, + "loss": 0.0437, "step": 6912 }, { "epoch": 72.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8795180722891567, + "eval_LOCATION_precision": 0.8727272727272727, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.8835820895522388, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.906832298136646, - "eval_ORGANIZATION_recall": 0.8538011695906432, + "eval_ORGANIZATION_precision": 0.9024390243902439, + "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.046420272439718246, - "eval_overall_accuracy": 0.9828776581054958, - "eval_overall_f1": 0.9093167701863354, - "eval_overall_precision": 0.915, - "eval_overall_recall": 0.9037037037037037, - "eval_runtime": 0.5929, - "eval_samples_per_second": 286.713, - "eval_steps_per_second": 5.06, + "eval_loss": 0.04594691842794418, + "eval_overall_accuracy": 0.9853631593482464, + "eval_overall_f1": 0.9160493827160494, + "eval_overall_precision": 0.9160493827160494, + "eval_overall_recall": 0.9160493827160494, + "eval_runtime": 0.2711, + "eval_samples_per_second": 627.021, + "eval_steps_per_second": 11.065, "step": 6912 }, { "epoch": 73.0, - "grad_norm": 1.1146135330200195, + "grad_norm": 1.4202793836593628, "learning_rate": 1.3500000000000001e-05, - "loss": 0.0431, + "loss": 0.0434, "step": 7008 }, { "epoch": 73.0, - "eval_LOCATION_f1": 0.8867924528301887, + "eval_LOCATION_f1": 0.9116279069767441, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8623853211009175, - "eval_LOCATION_recall": 0.912621359223301, - "eval_ORGANIZATION_f1": 0.8823529411764705, + "eval_LOCATION_precision": 0.875, + "eval_LOCATION_recall": 0.9514563106796117, + "eval_ORGANIZATION_f1": 0.8928571428571428, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8875739644970414, + "eval_ORGANIZATION_precision": 0.9090909090909091, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.044833190739154816, - "eval_overall_accuracy": 0.9856393261529964, - "eval_overall_f1": 0.9115479115479117, - "eval_overall_precision": 0.9070904645476773, - "eval_overall_recall": 0.9160493827160494, - "eval_runtime": 0.606, - "eval_samples_per_second": 280.509, - "eval_steps_per_second": 4.95, + "eval_loss": 0.0443597137928009, + "eval_overall_accuracy": 0.9861916597624966, + "eval_overall_f1": 0.922509225092251, + "eval_overall_precision": 0.9191176470588235, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2743, + "eval_samples_per_second": 619.819, + "eval_steps_per_second": 10.938, "step": 7008 }, { "epoch": 74.0, - "grad_norm": 0.3997597396373749, + "grad_norm": 0.6511960625648499, "learning_rate": 1.3000000000000001e-05, - "loss": 0.0415, + "loss": 0.0441, "step": 7104 }, { "epoch": 74.0, - "eval_LOCATION_f1": 0.8773584905660378, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8532110091743119, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.875739644970414, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8928571428571428, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8862275449101796, - "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_ORGANIZATION_precision": 0.9090909090909091, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04576844349503517, - "eval_overall_accuracy": 0.9842584921292461, - "eval_overall_f1": 0.9064039408866995, - "eval_overall_precision": 0.9041769041769042, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.6179, - "eval_samples_per_second": 275.116, - "eval_steps_per_second": 4.855, + "eval_loss": 0.044465843588113785, + "eval_overall_accuracy": 0.9861916597624966, + "eval_overall_f1": 0.9211822660098522, + "eval_overall_precision": 0.918918918918919, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2712, + "eval_samples_per_second": 626.86, + "eval_steps_per_second": 11.062, "step": 7104 }, { "epoch": 75.0, - "grad_norm": 0.6900985836982727, + "grad_norm": 0.9006913900375366, "learning_rate": 1.25e-05, - "loss": 0.0429, + "loss": 0.0439, "step": 7200 }, { "epoch": 75.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8731563421828908, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8928571428571428, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8809523809523809, - "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_ORGANIZATION_precision": 0.9090909090909091, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.046131208539009094, - "eval_overall_accuracy": 0.983982325324496, - "eval_overall_f1": 0.9064039408866995, - "eval_overall_precision": 0.9041769041769042, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.6007, - "eval_samples_per_second": 283.003, - "eval_steps_per_second": 4.994, + "eval_loss": 0.04460127279162407, + "eval_overall_accuracy": 0.9861916597624966, + "eval_overall_f1": 0.9211822660098522, + "eval_overall_precision": 0.918918918918919, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2727, + "eval_samples_per_second": 623.458, + "eval_steps_per_second": 11.002, "step": 7200 }, { "epoch": 76.0, - "grad_norm": 0.35220813751220703, + "grad_norm": 0.4795643389225006, "learning_rate": 1.2e-05, - "loss": 0.0426, + "loss": 0.042, "step": 7296 }, { "epoch": 76.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9023255813953488, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8731563421828908, + "eval_LOCATION_precision": 0.8660714285714286, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8875739644970414, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8809523809523809, - "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_ORGANIZATION_precision": 0.8982035928143712, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04544881358742714, - "eval_overall_accuracy": 0.9845346589339962, - "eval_overall_f1": 0.9064039408866995, - "eval_overall_precision": 0.9041769041769042, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.5968, - "eval_samples_per_second": 284.861, - "eval_steps_per_second": 5.027, + "eval_loss": 0.044701043516397476, + "eval_overall_accuracy": 0.9859154929577465, + "eval_overall_f1": 0.9177914110429448, + "eval_overall_precision": 0.9121951219512195, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2713, + "eval_samples_per_second": 626.658, + "eval_steps_per_second": 11.059, "step": 7296 }, { "epoch": 77.0, - "grad_norm": 0.7620628476142883, + "grad_norm": 0.5350348949432373, "learning_rate": 1.1500000000000002e-05, - "loss": 0.043, + "loss": 0.0428, "step": 7392 }, { "epoch": 77.0, - "eval_LOCATION_f1": 0.8857142857142858, + "eval_LOCATION_f1": 0.8971962616822431, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8691588785046729, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.875739644970414, + "eval_LOCATION_precision": 0.8648648648648649, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.8895522388059701, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8862275449101796, - "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_ORGANIZATION_precision": 0.9085365853658537, + "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04560285061597824, - "eval_overall_accuracy": 0.9845346589339962, - "eval_overall_f1": 0.908641975308642, - "eval_overall_precision": 0.908641975308642, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.5922, - "eval_samples_per_second": 287.086, - "eval_steps_per_second": 5.066, + "eval_loss": 0.04485779628157616, + "eval_overall_accuracy": 0.9864678265672466, + "eval_overall_f1": 0.9173859432799014, + "eval_overall_precision": 0.916256157635468, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.2726, + "eval_samples_per_second": 623.573, + "eval_steps_per_second": 11.004, "step": 7392 }, { "epoch": 78.0, - "grad_norm": 0.6972180008888245, + "grad_norm": 0.3859386444091797, "learning_rate": 1.1000000000000001e-05, - "loss": 0.0397, + "loss": 0.0435, "step": 7488 }, { "epoch": 78.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8842729970326408, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8928571428571428, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8975903614457831, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9090909090909091, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.045004814863204956, - "eval_overall_accuracy": 0.9856393261529964, - "eval_overall_f1": 0.9111111111111111, - "eval_overall_precision": 0.9111111111111111, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.5949, - "eval_samples_per_second": 285.753, - "eval_steps_per_second": 5.043, + "eval_loss": 0.044405534863471985, + "eval_overall_accuracy": 0.9867439933719967, + "eval_overall_f1": 0.9211822660098522, + "eval_overall_precision": 0.918918918918919, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2722, + "eval_samples_per_second": 624.535, + "eval_steps_per_second": 11.021, "step": 7488 }, { "epoch": 79.0, - "grad_norm": 1.175458312034607, + "grad_norm": 1.7985838651657104, "learning_rate": 1.05e-05, - "loss": 0.0411, + "loss": 0.0416, "step": 7584 }, { "epoch": 79.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9023255813953488, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8731563421828908, + "eval_LOCATION_precision": 0.8660714285714286, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8994082840236686, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8809523809523809, - "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_ORGANIZATION_precision": 0.9101796407185628, + "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04489603638648987, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9064039408866995, - "eval_overall_precision": 0.9041769041769042, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.5924, - "eval_samples_per_second": 286.953, - "eval_steps_per_second": 5.064, + "eval_loss": 0.04388272389769554, + "eval_overall_accuracy": 0.9861916597624966, + "eval_overall_f1": 0.9226993865030675, + "eval_overall_precision": 0.9170731707317074, + "eval_overall_recall": 0.928395061728395, + "eval_runtime": 0.2714, + "eval_samples_per_second": 626.402, + "eval_steps_per_second": 11.054, "step": 7584 }, { "epoch": 80.0, - "grad_norm": 1.935158610343933, + "grad_norm": 0.8757291436195374, "learning_rate": 1e-05, - "loss": 0.0417, + "loss": 0.0414, "step": 7680 }, { "epoch": 80.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8823529411764705, + "eval_LOCATION_precision": 0.8727272727272727, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.8961424332344213, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8875739644970414, - "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_ORGANIZATION_precision": 0.9096385542168675, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04481157660484314, - "eval_overall_accuracy": 0.9856393261529964, - "eval_overall_f1": 0.9102091020910209, - "eval_overall_precision": 0.9068627450980392, - "eval_overall_recall": 0.9135802469135802, - "eval_runtime": 0.6085, - "eval_samples_per_second": 279.37, - "eval_steps_per_second": 4.93, + "eval_loss": 0.04357600957155228, + "eval_overall_accuracy": 0.9867439933719967, + "eval_overall_f1": 0.9211822660098522, + "eval_overall_precision": 0.918918918918919, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2731, + "eval_samples_per_second": 622.407, + "eval_steps_per_second": 10.984, "step": 7680 }, { "epoch": 81.0, - "grad_norm": 0.7792730927467346, + "grad_norm": 0.4684378504753113, "learning_rate": 9.5e-06, - "loss": 0.0423, + "loss": 0.043, "step": 7776 }, { "epoch": 81.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8823529411764705, + "eval_LOCATION_precision": 0.8727272727272727, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8875739644970414, - "eval_ORGANIZATION_recall": 0.8771929824561403, + "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04457651078701019, - "eval_overall_accuracy": 0.9856393261529964, - "eval_overall_f1": 0.9102091020910209, - "eval_overall_precision": 0.9068627450980392, - "eval_overall_recall": 0.9135802469135802, - "eval_runtime": 0.6034, - "eval_samples_per_second": 281.758, - "eval_steps_per_second": 4.972, + "eval_loss": 0.043720223009586334, + "eval_overall_accuracy": 0.9870201601767468, + "eval_overall_f1": 0.9223181257706535, + "eval_overall_precision": 0.9211822660098522, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2724, + "eval_samples_per_second": 624.084, + "eval_steps_per_second": 11.013, "step": 7776 }, { "epoch": 82.0, - "grad_norm": 1.640648365020752, + "grad_norm": 0.9328840970993042, "learning_rate": 9e-06, - "loss": 0.0434, + "loss": 0.0433, "step": 7872 }, { "epoch": 82.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8994082840236686, + "eval_LOCATION_precision": 0.8818181818181818, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.9020771513353115, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.9101796407185628, + "eval_ORGANIZATION_precision": 0.9156626506024096, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04452081397175789, - "eval_overall_accuracy": 0.9861916597624966, - "eval_overall_f1": 0.9173859432799014, - "eval_overall_precision": 0.916256157635468, - "eval_overall_recall": 0.9185185185185185, - "eval_runtime": 0.6038, - "eval_samples_per_second": 281.535, - "eval_steps_per_second": 4.968, + "eval_loss": 0.04343878850340843, + "eval_overall_accuracy": 0.9872963269814968, + "eval_overall_f1": 0.9261083743842363, + "eval_overall_precision": 0.9238329238329238, + "eval_overall_recall": 0.928395061728395, + "eval_runtime": 0.2728, + "eval_samples_per_second": 623.154, + "eval_steps_per_second": 10.997, "step": 7872 }, { "epoch": 83.0, - "grad_norm": 0.8310399651527405, + "grad_norm": 0.808896005153656, "learning_rate": 8.500000000000002e-06, - "loss": 0.0394, + "loss": 0.0428, "step": 7968 }, { "epoch": 83.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9023255813953488, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8731563421828908, + "eval_LOCATION_precision": 0.8660714285714286, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.9047619047619047, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8809523809523809, - "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_ORGANIZATION_precision": 0.9212121212121213, + "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04489120468497276, - "eval_overall_accuracy": 0.9850869925434963, - "eval_overall_f1": 0.9064039408866995, - "eval_overall_precision": 0.9041769041769042, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.6046, - "eval_samples_per_second": 281.181, - "eval_steps_per_second": 4.962, + "eval_loss": 0.043899569660425186, + "eval_overall_accuracy": 0.9867439933719967, + "eval_overall_f1": 0.9249692496924968, + "eval_overall_precision": 0.9215686274509803, + "eval_overall_recall": 0.928395061728395, + "eval_runtime": 0.2716, + "eval_samples_per_second": 625.96, + "eval_steps_per_second": 11.046, "step": 7968 }, { "epoch": 84.0, - "grad_norm": 0.9918228387832642, + "grad_norm": 0.7827540040016174, "learning_rate": 8.000000000000001e-06, - "loss": 0.0413, + "loss": 0.0418, "step": 8064 }, { "epoch": 84.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8809523809523809, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.9020771513353115, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.896969696969697, - "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_ORGANIZATION_precision": 0.9156626506024096, + "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04544991999864578, - "eval_overall_accuracy": 0.9850869925434963, - "eval_overall_f1": 0.9097651421508034, - "eval_overall_precision": 0.9108910891089109, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.6004, - "eval_samples_per_second": 283.156, - "eval_steps_per_second": 4.997, + "eval_loss": 0.04345087707042694, + "eval_overall_accuracy": 0.9867439933719967, + "eval_overall_f1": 0.9249692496924968, + "eval_overall_precision": 0.9215686274509803, + "eval_overall_recall": 0.928395061728395, + "eval_runtime": 0.2707, + "eval_samples_per_second": 628.109, + "eval_steps_per_second": 11.084, "step": 8064 }, { "epoch": 85.0, - "grad_norm": 0.9799830317497253, + "grad_norm": 0.7387497425079346, "learning_rate": 7.5e-06, - "loss": 0.0408, + "loss": 0.0416, "step": 8160 }, { "epoch": 85.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8783382789317508, + "eval_LOCATION_precision": 0.8727272727272727, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.891566265060241, - "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.045652709901332855, - "eval_overall_accuracy": 0.9842584921292461, - "eval_overall_f1": 0.908641975308642, - "eval_overall_precision": 0.908641975308642, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.604, - "eval_samples_per_second": 281.458, - "eval_steps_per_second": 4.967, + "eval_loss": 0.04350125417113304, + "eval_overall_accuracy": 0.9870201601767468, + "eval_overall_f1": 0.9223181257706535, + "eval_overall_precision": 0.9211822660098522, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2731, + "eval_samples_per_second": 622.486, + "eval_steps_per_second": 10.985, "step": 8160 }, { "epoch": 86.0, - "grad_norm": 1.889784574508667, + "grad_norm": 0.7260624170303345, "learning_rate": 7.000000000000001e-06, - "loss": 0.0412, + "loss": 0.0413, "step": 8256 }, { "epoch": 86.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8731563421828908, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.9047619047619047, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8809523809523809, - "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_ORGANIZATION_precision": 0.9212121212121213, + "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.045468661934137344, - "eval_overall_accuracy": 0.9845346589339962, - "eval_overall_f1": 0.9064039408866995, - "eval_overall_precision": 0.9041769041769042, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.6007, - "eval_samples_per_second": 283.007, - "eval_steps_per_second": 4.994, + "eval_loss": 0.04387590289115906, + "eval_overall_accuracy": 0.9872963269814968, + "eval_overall_f1": 0.9261083743842363, + "eval_overall_precision": 0.9238329238329238, + "eval_overall_recall": 0.928395061728395, + "eval_runtime": 0.2733, + "eval_samples_per_second": 622.03, + "eval_steps_per_second": 10.977, "step": 8256 }, { "epoch": 87.0, - "grad_norm": 1.0306001901626587, + "grad_norm": 0.7930686473846436, "learning_rate": 6.5000000000000004e-06, - "loss": 0.0425, + "loss": 0.0423, "step": 8352 }, { "epoch": 87.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8731563421828908, + "eval_LOCATION_precision": 0.8727272727272727, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.8895522388059701, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8809523809523809, - "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_ORGANIZATION_precision": 0.9085365853658537, + "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.045380450785160065, - "eval_overall_accuracy": 0.9842584921292461, - "eval_overall_f1": 0.9064039408866995, - "eval_overall_precision": 0.9041769041769042, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.607, - "eval_samples_per_second": 280.052, - "eval_steps_per_second": 4.942, + "eval_loss": 0.04400918632745743, + "eval_overall_accuracy": 0.9864678265672466, + "eval_overall_f1": 0.9185185185185185, + "eval_overall_precision": 0.9185185185185185, + "eval_overall_recall": 0.9185185185185185, + "eval_runtime": 0.2715, + "eval_samples_per_second": 626.246, + "eval_steps_per_second": 11.051, "step": 8352 }, { "epoch": 88.0, - "grad_norm": 1.6095170974731445, + "grad_norm": 0.6405452489852905, "learning_rate": 6e-06, - "loss": 0.0404, + "loss": 0.0409, "step": 8448 }, { "epoch": 88.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8764705882352941, + "eval_LOCATION_precision": 0.8818181818181818, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8955223880597015, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8816568047337278, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9146341463414634, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.045171529054641724, - "eval_overall_accuracy": 0.9850869925434963, - "eval_overall_f1": 0.9077490774907748, - "eval_overall_precision": 0.9044117647058824, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.5928, - "eval_samples_per_second": 286.772, - "eval_steps_per_second": 5.061, + "eval_loss": 0.04390283301472664, + "eval_overall_accuracy": 0.9870201601767468, + "eval_overall_f1": 0.9234567901234567, + "eval_overall_precision": 0.9234567901234568, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2725, + "eval_samples_per_second": 623.922, + "eval_steps_per_second": 11.01, "step": 8448 }, { "epoch": 89.0, - "grad_norm": 1.9292471408843994, + "grad_norm": 0.8073258996009827, "learning_rate": 5.500000000000001e-06, - "loss": 0.0403, + "loss": 0.0419, "step": 8544 }, { "epoch": 89.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9023255813953488, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.875739644970414, + "eval_LOCATION_precision": 0.8660714285714286, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.9047619047619047, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8862275449101796, - "eval_ORGANIZATION_recall": 0.8654970760233918, + "eval_ORGANIZATION_precision": 0.9212121212121213, + "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04589909315109253, - "eval_overall_accuracy": 0.9842584921292461, - "eval_overall_f1": 0.9075215782983971, - "eval_overall_precision": 0.9064039408866995, - "eval_overall_recall": 0.908641975308642, - "eval_runtime": 0.6049, - "eval_samples_per_second": 281.046, - "eval_steps_per_second": 4.96, + "eval_loss": 0.043653201311826706, + "eval_overall_accuracy": 0.9870201601767468, + "eval_overall_f1": 0.9249692496924968, + "eval_overall_precision": 0.9215686274509803, + "eval_overall_recall": 0.928395061728395, + "eval_runtime": 0.271, + "eval_samples_per_second": 627.261, + "eval_steps_per_second": 11.069, "step": 8544 }, { "epoch": 90.0, - "grad_norm": 0.9496196508407593, + "grad_norm": 0.8635096549987793, "learning_rate": 5e-06, - "loss": 0.0395, + "loss": 0.0424, "step": 8640 }, { "epoch": 90.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9023255813953488, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8739002932551321, + "eval_LOCATION_precision": 0.8660714285714286, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8764705882352941, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04514380544424057, - "eval_overall_accuracy": 0.9845346589339962, - "eval_overall_f1": 0.9066339066339066, - "eval_overall_precision": 0.902200488997555, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.6057, - "eval_samples_per_second": 280.676, - "eval_steps_per_second": 4.953, + "eval_loss": 0.043814968317747116, + "eval_overall_accuracy": 0.9867439933719967, + "eval_overall_f1": 0.922509225092251, + "eval_overall_precision": 0.9191176470588235, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2729, + "eval_samples_per_second": 622.855, + "eval_steps_per_second": 10.992, "step": 8640 }, { "epoch": 91.0, - "grad_norm": 1.498864769935608, + "grad_norm": 0.7420637607574463, "learning_rate": 4.5e-06, - "loss": 0.0413, + "loss": 0.0419, "step": 8736 }, { "epoch": 91.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8739002932551321, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8955223880597015, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8764705882352941, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9146341463414634, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04504216089844704, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9066339066339066, - "eval_overall_precision": 0.902200488997555, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.6, - "eval_samples_per_second": 283.336, - "eval_steps_per_second": 5.0, + "eval_loss": 0.04388648644089699, + "eval_overall_accuracy": 0.9867439933719967, + "eval_overall_f1": 0.9223181257706535, + "eval_overall_precision": 0.9211822660098522, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2731, + "eval_samples_per_second": 622.382, + "eval_steps_per_second": 10.983, "step": 8736 }, { "epoch": 92.0, - "grad_norm": 0.8939065337181091, + "grad_norm": 0.2291034311056137, "learning_rate": 4.000000000000001e-06, - "loss": 0.039, + "loss": 0.0427, "step": 8832 }, { "epoch": 92.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8739002932551321, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8955223880597015, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8764705882352941, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9146341463414634, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04519949480891228, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9066339066339066, - "eval_overall_precision": 0.902200488997555, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.6015, - "eval_samples_per_second": 282.646, - "eval_steps_per_second": 4.988, + "eval_loss": 0.044274233281612396, + "eval_overall_accuracy": 0.9867439933719967, + "eval_overall_f1": 0.9223181257706535, + "eval_overall_precision": 0.9211822660098522, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2719, + "eval_samples_per_second": 625.194, + "eval_steps_per_second": 11.033, "step": 8832 }, { "epoch": 93.0, - "grad_norm": 76.55709838867188, + "grad_norm": 0.9127195477485657, "learning_rate": 3.5000000000000004e-06, - "loss": 0.0402, + "loss": 0.0397, "step": 8928 }, { "epoch": 93.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8764705882352941, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8955223880597015, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8816568047337278, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9146341463414634, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.045382946729660034, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9077490774907748, - "eval_overall_precision": 0.9044117647058824, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.5985, - "eval_samples_per_second": 284.026, - "eval_steps_per_second": 5.012, + "eval_loss": 0.04379863291978836, + "eval_overall_accuracy": 0.9867439933719967, + "eval_overall_f1": 0.9223181257706535, + "eval_overall_precision": 0.9211822660098522, + "eval_overall_recall": 0.9234567901234568, + "eval_runtime": 0.2732, + "eval_samples_per_second": 622.226, + "eval_steps_per_second": 10.98, "step": 8928 }, { "epoch": 94.0, - "grad_norm": 0.24397915601730347, + "grad_norm": 1.1270931959152222, "learning_rate": 3e-06, - "loss": 0.0397, + "loss": 0.0414, "step": 9024 }, { "epoch": 94.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8739002932551321, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8764705882352941, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.045313529670238495, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9066339066339066, - "eval_overall_precision": 0.902200488997555, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.5887, - "eval_samples_per_second": 288.767, - "eval_steps_per_second": 5.096, + "eval_loss": 0.043688420206308365, + "eval_overall_accuracy": 0.9870201601767468, + "eval_overall_f1": 0.9236453201970444, + "eval_overall_precision": 0.9213759213759214, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2716, + "eval_samples_per_second": 625.967, + "eval_steps_per_second": 11.046, "step": 9024 }, { "epoch": 95.0, - "grad_norm": 0.29766303300857544, + "grad_norm": 0.35525602102279663, "learning_rate": 2.5e-06, - "loss": 0.0409, + "loss": 0.0401, "step": 9120 }, { "epoch": 95.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8764705882352941, + "eval_LOCATION_precision": 0.8818181818181818, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8816568047337278, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.045457012951374054, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9077490774907748, - "eval_overall_precision": 0.9044117647058824, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.5948, - "eval_samples_per_second": 285.825, - "eval_steps_per_second": 5.044, + "eval_loss": 0.043763358145952225, + "eval_overall_accuracy": 0.9872963269814968, + "eval_overall_f1": 0.9247842170160296, + "eval_overall_precision": 0.9236453201970444, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2719, + "eval_samples_per_second": 625.255, + "eval_steps_per_second": 11.034, "step": 9120 }, { "epoch": 96.0, - "grad_norm": 1.5634249448776245, + "grad_norm": 0.21041779220104218, "learning_rate": 2.0000000000000003e-06, - "loss": 0.039, + "loss": 0.0415, "step": 9216 }, { "epoch": 96.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8739002932551321, + "eval_LOCATION_precision": 0.8727272727272727, + "eval_LOCATION_recall": 0.9320388349514563, + "eval_ORGANIZATION_f1": 0.8955223880597015, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8764705882352941, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9146341463414634, + "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.0454791858792305, - "eval_overall_accuracy": 0.9845346589339962, - "eval_overall_f1": 0.9066339066339066, - "eval_overall_precision": 0.902200488997555, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.5934, - "eval_samples_per_second": 286.492, - "eval_steps_per_second": 5.056, + "eval_loss": 0.043890830129384995, + "eval_overall_accuracy": 0.9867439933719967, + "eval_overall_f1": 0.9209876543209876, + "eval_overall_precision": 0.9209876543209876, + "eval_overall_recall": 0.9209876543209876, + "eval_runtime": 0.2726, + "eval_samples_per_second": 623.718, + "eval_steps_per_second": 11.007, "step": 9216 }, { "epoch": 97.0, - "grad_norm": 0.9605298042297363, + "grad_norm": 0.8473381996154785, "learning_rate": 1.5e-06, - "loss": 0.0402, + "loss": 0.0404, "step": 9312 }, { "epoch": 97.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8739002932551321, + "eval_LOCATION_precision": 0.8818181818181818, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8764705882352941, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.0456123985350132, - "eval_overall_accuracy": 0.9845346589339962, - "eval_overall_f1": 0.9066339066339066, - "eval_overall_precision": 0.902200488997555, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.6745, - "eval_samples_per_second": 252.035, - "eval_steps_per_second": 4.448, + "eval_loss": 0.043740756809711456, + "eval_overall_accuracy": 0.9872963269814968, + "eval_overall_f1": 0.9247842170160296, + "eval_overall_precision": 0.9236453201970444, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2732, + "eval_samples_per_second": 622.321, + "eval_steps_per_second": 10.982, "step": 9312 }, { "epoch": 98.0, - "grad_norm": 1.7611069679260254, + "grad_norm": 1.3456790447235107, "learning_rate": 1.0000000000000002e-06, - "loss": 0.0382, + "loss": 0.0418, "step": 9408 }, { "epoch": 98.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8764705882352941, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8816568047337278, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04547755792737007, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9077490774907748, - "eval_overall_precision": 0.9044117647058824, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.5989, - "eval_samples_per_second": 283.847, - "eval_steps_per_second": 5.009, + "eval_loss": 0.04376491159200668, + "eval_overall_accuracy": 0.9870201601767468, + "eval_overall_f1": 0.9236453201970444, + "eval_overall_precision": 0.9213759213759214, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2717, + "eval_samples_per_second": 625.746, + "eval_steps_per_second": 11.043, "step": 9408 }, { "epoch": 99.0, - "grad_norm": 0.4898216426372528, + "grad_norm": 2.012141704559326, "learning_rate": 5.000000000000001e-07, - "loss": 0.0392, + "loss": 0.0388, "step": 9504 }, { "epoch": 99.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8764705882352941, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8816568047337278, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.04550032317638397, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9077490774907748, - "eval_overall_precision": 0.9044117647058824, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.5984, - "eval_samples_per_second": 284.085, - "eval_steps_per_second": 5.013, + "eval_loss": 0.043746430426836014, + "eval_overall_accuracy": 0.9870201601767468, + "eval_overall_f1": 0.9236453201970444, + "eval_overall_precision": 0.9213759213759214, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2735, + "eval_samples_per_second": 621.47, + "eval_steps_per_second": 10.967, "step": 9504 }, { "epoch": 100.0, - "grad_norm": 1.0116997957229614, + "grad_norm": 0.3253363370895386, "learning_rate": 0.0, - "loss": 0.0412, + "loss": 0.0397, "step": 9600 }, { "epoch": 100.0, - "eval_LOCATION_f1": 0.8815165876777251, + "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, - "eval_LOCATION_precision": 0.8611111111111112, - "eval_LOCATION_recall": 0.9029126213592233, - "eval_ORGANIZATION_f1": 0.8764705882352941, + "eval_LOCATION_precision": 0.8738738738738738, + "eval_LOCATION_recall": 0.941747572815534, + "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, - "eval_ORGANIZATION_precision": 0.8816568047337278, - "eval_ORGANIZATION_recall": 0.8713450292397661, + "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, - "eval_loss": 0.045390497893095016, - "eval_overall_accuracy": 0.9848108257387462, - "eval_overall_f1": 0.9077490774907748, - "eval_overall_precision": 0.9044117647058824, - "eval_overall_recall": 0.9111111111111111, - "eval_runtime": 0.6032, - "eval_samples_per_second": 281.85, - "eval_steps_per_second": 4.974, + "eval_loss": 0.04369127005338669, + "eval_overall_accuracy": 0.9870201601767468, + "eval_overall_f1": 0.9236453201970444, + "eval_overall_precision": 0.9213759213759214, + "eval_overall_recall": 0.9259259259259259, + "eval_runtime": 0.2733, + "eval_samples_per_second": 622.123, + "eval_steps_per_second": 10.979, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 3889996718638620.0, - "train_loss": 0.09181262865662575, - "train_runtime": 1037.4922, - "train_samples_per_second": 147.567, - "train_steps_per_second": 9.253 + "train_loss": 0.09361960942546527, + "train_runtime": 547.4358, + "train_samples_per_second": 279.667, + "train_steps_per_second": 17.536 } ], "logging_steps": 500,