diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3930 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 100.0, + "eval_steps": 500, + "global_step": 10600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "grad_norm": 0.9845661520957947, + "learning_rate": 4.9500000000000004e-05, + "loss": 0.9292, + "step": 106 + }, + { + "epoch": 1.0, + "eval_LOCATION_f1": 0.0, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.0, + "eval_LOCATION_recall": 0.0, + "eval_ORGANIZATION_f1": 0.0, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.0, + "eval_ORGANIZATION_recall": 0.0, + "eval_PERSON_f1": 0.012903225806451613, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.3333333333333333, + "eval_PERSON_recall": 0.006578947368421052, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.0, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.0, + "eval_TIME_recall": 0.0, + "eval_loss": 0.6127339601516724, + "eval_overall_accuracy": 0.8399487836107554, + "eval_overall_f1": 0.005865102639296187, + "eval_overall_precision": 0.3333333333333333, + "eval_overall_recall": 0.0029585798816568047, + "eval_runtime": 0.7389, + "eval_samples_per_second": 253.078, + "eval_steps_per_second": 4.06, + "step": 106 + }, + { + "epoch": 2.0, + "grad_norm": 1.0367096662521362, + "learning_rate": 4.9e-05, + "loss": 0.5076, + "step": 212 + }, + { + "epoch": 2.0, + "eval_LOCATION_f1": 0.3577235772357724, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.4074074074074074, + "eval_LOCATION_recall": 0.3188405797101449, + "eval_ORGANIZATION_f1": 0.11235955056179776, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.16129032258064516, + "eval_ORGANIZATION_recall": 0.08620689655172414, + "eval_PERSON_f1": 0.6703601108033241, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.5789473684210527, + "eval_PERSON_recall": 0.7960526315789473, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.4918032786885246, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.46875, + "eval_TIME_recall": 0.5172413793103449, + "eval_loss": 0.33459725975990295, + "eval_overall_accuracy": 0.9044814340588988, + "eval_overall_f1": 0.48082595870206496, + "eval_overall_precision": 0.47941176470588237, + "eval_overall_recall": 0.4822485207100592, + "eval_runtime": 0.7528, + "eval_samples_per_second": 248.397, + "eval_steps_per_second": 3.985, + "step": 212 + }, + { + "epoch": 3.0, + "grad_norm": 1.3818385601043701, + "learning_rate": 4.85e-05, + "loss": 0.2907, + "step": 318 + }, + { + "epoch": 3.0, + "eval_LOCATION_f1": 0.6705882352941176, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.5643564356435643, + "eval_LOCATION_recall": 0.8260869565217391, + "eval_ORGANIZATION_f1": 0.39682539682539686, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.36764705882352944, + "eval_ORGANIZATION_recall": 0.43103448275862066, + "eval_PERSON_f1": 0.8085106382978723, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.751412429378531, + "eval_PERSON_recall": 0.875, + "eval_QUANTITY_f1": 0.605263157894737, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.5, + "eval_QUANTITY_recall": 0.7666666666666667, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1991610825061798, + "eval_overall_accuracy": 0.9323943661971831, + "eval_overall_f1": 0.6885676741130091, + "eval_overall_precision": 0.6193853427895981, + "eval_overall_recall": 0.7751479289940828, + "eval_runtime": 0.7451, + "eval_samples_per_second": 250.989, + "eval_steps_per_second": 4.027, + "step": 318 + }, + { + "epoch": 4.0, + "grad_norm": 2.382436513900757, + "learning_rate": 4.8e-05, + "loss": 0.1931, + "step": 424 + }, + { + "epoch": 4.0, + "eval_LOCATION_f1": 0.7875, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.6923076923076923, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.6323529411764706, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.5512820512820513, + "eval_ORGANIZATION_recall": 0.7413793103448276, + "eval_PERSON_f1": 0.7896253602305474, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.7025641025641025, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.6410256410256411, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.5208333333333334, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.6133333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.5, + "eval_TIME_recall": 0.7931034482758621, + "eval_loss": 0.190628781914711, + "eval_overall_accuracy": 0.9352112676056338, + "eval_overall_f1": 0.7311557788944725, + "eval_overall_precision": 0.6353711790393013, + "eval_overall_recall": 0.8609467455621301, + "eval_runtime": 0.7563, + "eval_samples_per_second": 247.259, + "eval_steps_per_second": 3.967, + "step": 424 + }, + { + "epoch": 5.0, + "grad_norm": 1.9061810970306396, + "learning_rate": 4.75e-05, + "loss": 0.1671, + "step": 530 + }, + { + "epoch": 5.0, + "eval_LOCATION_f1": 0.7832167832167832, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7567567567567568, + "eval_LOCATION_recall": 0.8115942028985508, + "eval_ORGANIZATION_f1": 0.6714285714285714, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.573170731707317, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8535825545171339, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8106508875739645, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7826086956521738, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6923076923076923, + "eval_QUANTITY_recall": 0.9, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.13657993078231812, + "eval_overall_accuracy": 0.953393085787452, + "eval_overall_f1": 0.7978142076502731, + "eval_overall_precision": 0.7411167512690355, + "eval_overall_recall": 0.863905325443787, + "eval_runtime": 0.768, + "eval_samples_per_second": 243.498, + "eval_steps_per_second": 3.906, + "step": 530 + }, + { + "epoch": 6.0, + "grad_norm": 1.6724838018417358, + "learning_rate": 4.7e-05, + "loss": 0.1438, + "step": 636 + }, + { + "epoch": 6.0, + "eval_LOCATION_f1": 0.7564102564102564, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.6781609195402298, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6231884057971016, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.5375, + "eval_ORGANIZATION_recall": 0.7413793103448276, + "eval_PERSON_f1": 0.8430769230769231, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.791907514450867, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7246376811594204, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6410256410256411, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.71875, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.6571428571428571, + "eval_TIME_recall": 0.7931034482758621, + "eval_loss": 0.13694226741790771, + "eval_overall_accuracy": 0.9528809218950064, + "eval_overall_f1": 0.7632978723404256, + "eval_overall_precision": 0.6932367149758454, + "eval_overall_recall": 0.849112426035503, + "eval_runtime": 0.846, + "eval_samples_per_second": 221.028, + "eval_steps_per_second": 3.546, + "step": 636 + }, + { + "epoch": 7.0, + "grad_norm": 0.49843528866767883, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.1337, + "step": 742 + }, + { + "epoch": 7.0, + "eval_LOCATION_f1": 0.7712418300653594, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7023809523809523, + "eval_LOCATION_recall": 0.855072463768116, + "eval_ORGANIZATION_f1": 0.6557377049180328, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.625, + "eval_ORGANIZATION_recall": 0.6896551724137931, + "eval_PERSON_f1": 0.8679245283018868, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8313253012048193, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.12538817524909973, + "eval_overall_accuracy": 0.9585147247119078, + "eval_overall_f1": 0.796116504854369, + "eval_overall_precision": 0.7493472584856397, + "eval_overall_recall": 0.849112426035503, + "eval_runtime": 0.8252, + "eval_samples_per_second": 226.607, + "eval_steps_per_second": 3.635, + "step": 742 + }, + { + "epoch": 8.0, + "grad_norm": 0.4025654196739197, + "learning_rate": 4.600000000000001e-05, + "loss": 0.1244, + "step": 848 + }, + { + "epoch": 8.0, + "eval_LOCATION_f1": 0.8275862068965517, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7894736842105263, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.6825396825396826, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6323529411764706, + "eval_ORGANIZATION_recall": 0.7413793103448276, + "eval_PERSON_f1": 0.8769716088328074, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8424242424242424, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.6875, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6470588235294118, + "eval_QUANTITY_recall": 0.7333333333333333, + "eval_TIME_f1": 0.7540983606557378, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.71875, + "eval_TIME_recall": 0.7931034482758621, + "eval_loss": 0.12903648614883423, + "eval_overall_accuracy": 0.9572343149807938, + "eval_overall_f1": 0.8050490883590464, + "eval_overall_precision": 0.7653333333333333, + "eval_overall_recall": 0.849112426035503, + "eval_runtime": 0.8305, + "eval_samples_per_second": 225.153, + "eval_steps_per_second": 3.612, + "step": 848 + }, + { + "epoch": 9.0, + "grad_norm": 1.2503007650375366, + "learning_rate": 4.55e-05, + "loss": 0.1195, + "step": 954 + }, + { + "epoch": 9.0, + "eval_LOCATION_f1": 0.8243243243243245, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7721518987341772, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.6875, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6285714285714286, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8598130841121495, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8165680473372781, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8064516129032258, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7575757575757576, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.12411422282457352, + "eval_overall_accuracy": 0.9592829705505762, + "eval_overall_f1": 0.8093922651933702, + "eval_overall_precision": 0.7590673575129534, + "eval_overall_recall": 0.8668639053254438, + "eval_runtime": 0.8548, + "eval_samples_per_second": 218.761, + "eval_steps_per_second": 3.51, + "step": 954 + }, + { + "epoch": 10.0, + "grad_norm": 1.5948196649551392, + "learning_rate": 4.5e-05, + "loss": 0.1083, + "step": 1060 + }, + { + "epoch": 10.0, + "eval_LOCATION_f1": 0.7973856209150327, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7261904761904762, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.6875, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6285714285714286, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8761904761904762, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7575757575757577, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8135593220338982, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.12625408172607422, + "eval_overall_accuracy": 0.9592829705505762, + "eval_overall_f1": 0.8099861303744799, + "eval_overall_precision": 0.762402088772846, + "eval_overall_recall": 0.863905325443787, + "eval_runtime": 0.9354, + "eval_samples_per_second": 199.908, + "eval_steps_per_second": 3.207, + "step": 1060 + }, + { + "epoch": 11.0, + "grad_norm": 0.7792600393295288, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.1025, + "step": 1166 + }, + { + "epoch": 11.0, + "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7441860465116279, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7040000000000001, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6567164179104478, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8853503184713376, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8580246913580247, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.64, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.5333333333333333, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.7741935483870968, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7272727272727273, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.14459605515003204, + "eval_overall_accuracy": 0.9551856594110115, + "eval_overall_f1": 0.8071135430916553, + "eval_overall_precision": 0.7506361323155216, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.9057, + "eval_samples_per_second": 206.46, + "eval_steps_per_second": 3.312, + "step": 1166 + }, + { + "epoch": 12.0, + "grad_norm": 1.0304011106491089, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.0969, + "step": 1272 + }, + { + "epoch": 12.0, + "eval_LOCATION_f1": 0.7973856209150327, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7261904761904762, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.6991869918699186, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6615384615384615, + "eval_ORGANIZATION_recall": 0.7413793103448276, + "eval_PERSON_f1": 0.8789808917197451, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1326281726360321, + "eval_overall_accuracy": 0.9595390524967989, + "eval_overall_f1": 0.8111888111888113, + "eval_overall_precision": 0.7692307692307693, + "eval_overall_recall": 0.8579881656804734, + "eval_runtime": 0.875, + "eval_samples_per_second": 213.704, + "eval_steps_per_second": 3.428, + "step": 1272 + }, + { + "epoch": 13.0, + "grad_norm": 0.34425103664398193, + "learning_rate": 4.35e-05, + "loss": 0.0885, + "step": 1378 + }, + { + "epoch": 13.0, + "eval_LOCATION_f1": 0.821917808219178, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7792207792207793, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7040000000000001, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6567164179104478, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.888888888888889, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8588957055214724, + "eval_PERSON_recall": 0.9210526315789473, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.11661401391029358, + "eval_overall_accuracy": 0.9626120358514725, + "eval_overall_f1": 0.8316831683168318, + "eval_overall_precision": 0.7967479674796748, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.8754, + "eval_samples_per_second": 213.615, + "eval_steps_per_second": 3.427, + "step": 1378 + }, + { + "epoch": 14.0, + "grad_norm": 1.0210072994232178, + "learning_rate": 4.3e-05, + "loss": 0.0897, + "step": 1484 + }, + { + "epoch": 14.0, + "eval_LOCATION_f1": 0.8322147651006713, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.775, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.6825396825396826, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6323529411764706, + "eval_ORGANIZATION_recall": 0.7413793103448276, + "eval_PERSON_f1": 0.8734177215189874, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7272727272727273, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.6486486486486487, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1263030618429184, + "eval_overall_accuracy": 0.9603072983354674, + "eval_overall_f1": 0.8099861303744799, + "eval_overall_precision": 0.762402088772846, + "eval_overall_recall": 0.863905325443787, + "eval_runtime": 0.9149, + "eval_samples_per_second": 204.387, + "eval_steps_per_second": 3.279, + "step": 1484 + }, + { + "epoch": 15.0, + "grad_norm": 0.7929083704948425, + "learning_rate": 4.25e-05, + "loss": 0.0847, + "step": 1590 + }, + { + "epoch": 15.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.6721311475409836, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.640625, + "eval_ORGANIZATION_recall": 0.7068965517241379, + "eval_PERSON_f1": 0.8789808917197451, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7058823529411765, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.631578947368421, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.75, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.6857142857142857, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.12834492325782776, + "eval_overall_accuracy": 0.9592829705505762, + "eval_overall_f1": 0.8123249299719889, + "eval_overall_precision": 0.7712765957446809, + "eval_overall_recall": 0.8579881656804734, + "eval_runtime": 0.8739, + "eval_samples_per_second": 213.989, + "eval_steps_per_second": 3.433, + "step": 1590 + }, + { + "epoch": 16.0, + "grad_norm": 0.8108320236206055, + "learning_rate": 4.2e-05, + "loss": 0.0808, + "step": 1696 + }, + { + "epoch": 16.0, + "eval_LOCATION_f1": 0.8299319727891156, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.782051282051282, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7040000000000001, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6567164179104478, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8874598070739551, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8679245283018868, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.761904761904762, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7272727272727273, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8196721311475409, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.12765684723854065, + "eval_overall_accuracy": 0.9623559539052496, + "eval_overall_f1": 0.826025459688826, + "eval_overall_precision": 0.7913279132791328, + "eval_overall_recall": 0.863905325443787, + "eval_runtime": 0.8873, + "eval_samples_per_second": 210.758, + "eval_steps_per_second": 3.381, + "step": 1696 + }, + { + "epoch": 17.0, + "grad_norm": 0.31881314516067505, + "learning_rate": 4.15e-05, + "loss": 0.0746, + "step": 1802 + }, + { + "epoch": 17.0, + "eval_LOCATION_f1": 0.8266666666666668, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7654320987654321, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7086614173228347, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6521739130434783, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8874598070739551, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8679245283018868, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.13274899125099182, + "eval_overall_accuracy": 0.9592829705505762, + "eval_overall_f1": 0.8218793828892006, + "eval_overall_precision": 0.7813333333333333, + "eval_overall_recall": 0.8668639053254438, + "eval_runtime": 0.8878, + "eval_samples_per_second": 210.626, + "eval_steps_per_second": 3.379, + "step": 1802 + }, + { + "epoch": 18.0, + "grad_norm": 1.1737678050994873, + "learning_rate": 4.1e-05, + "loss": 0.0715, + "step": 1908 + }, + { + "epoch": 18.0, + "eval_LOCATION_f1": 0.8671328671328672, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8378378378378378, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.688, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6417910447761194, + "eval_ORGANIZATION_recall": 0.7413793103448276, + "eval_PERSON_f1": 0.8726114649681528, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.845679012345679, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1314823478460312, + "eval_overall_accuracy": 0.9595390524967989, + "eval_overall_f1": 0.8215297450424929, + "eval_overall_precision": 0.7880434782608695, + "eval_overall_recall": 0.8579881656804734, + "eval_runtime": 0.8811, + "eval_samples_per_second": 212.231, + "eval_steps_per_second": 3.405, + "step": 1908 + }, + { + "epoch": 19.0, + "grad_norm": 0.9921818971633911, + "learning_rate": 4.05e-05, + "loss": 0.0695, + "step": 2014 + }, + { + "epoch": 19.0, + "eval_LOCATION_f1": 0.8378378378378378, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7848101265822784, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7213114754098361, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6875, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8753993610223643, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8509316770186336, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7164179104477612, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6486486486486487, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.7936507936507937, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7352941176470589, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1310775727033615, + "eval_overall_accuracy": 0.9582586427656851, + "eval_overall_f1": 0.8190743338008415, + "eval_overall_precision": 0.7786666666666666, + "eval_overall_recall": 0.863905325443787, + "eval_runtime": 0.8843, + "eval_samples_per_second": 211.462, + "eval_steps_per_second": 3.392, + "step": 2014 + }, + { + "epoch": 20.0, + "grad_norm": 0.8729901909828186, + "learning_rate": 4e-05, + "loss": 0.0705, + "step": 2120 + }, + { + "epoch": 20.0, + "eval_LOCATION_f1": 0.851063829787234, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8333333333333334, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.688, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6417910447761194, + "eval_ORGANIZATION_recall": 0.7413793103448276, + "eval_PERSON_f1": 0.8761904761904762, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.13000606000423431, + "eval_overall_accuracy": 0.9597951344430218, + "eval_overall_f1": 0.8238636363636365, + "eval_overall_precision": 0.7923497267759563, + "eval_overall_recall": 0.8579881656804734, + "eval_runtime": 0.87, + "eval_samples_per_second": 214.936, + "eval_steps_per_second": 3.448, + "step": 2120 + }, + { + "epoch": 21.0, + "grad_norm": 0.5570505857467651, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.0623, + "step": 2226 + }, + { + "epoch": 21.0, + "eval_LOCATION_f1": 0.8590604026845639, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7076923076923076, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6388888888888888, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.9061488673139159, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.89171974522293, + "eval_PERSON_recall": 0.9210526315789473, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.7741935483870968, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7272727272727273, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.14154018461704254, + "eval_overall_accuracy": 0.9608194622279129, + "eval_overall_f1": 0.8347338935574229, + "eval_overall_precision": 0.7925531914893617, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.892, + "eval_samples_per_second": 209.65, + "eval_steps_per_second": 3.363, + "step": 2226 + }, + { + "epoch": 22.0, + "grad_norm": 0.4297601878643036, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.0636, + "step": 2332 + }, + { + "epoch": 22.0, + "eval_LOCATION_f1": 0.8366013071895425, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7619047619047619, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.706766917293233, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6266666666666667, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8761904761904762, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.7812500000000001, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7142857142857143, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.14867202937602997, + "eval_overall_accuracy": 0.9590268886043534, + "eval_overall_f1": 0.8175582990397806, + "eval_overall_precision": 0.7621483375959079, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.8724, + "eval_samples_per_second": 214.362, + "eval_steps_per_second": 3.439, + "step": 2332 + }, + { + "epoch": 23.0, + "grad_norm": 0.4904947876930237, + "learning_rate": 3.85e-05, + "loss": 0.0571, + "step": 2438 + }, + { + "epoch": 23.0, + "eval_LOCATION_f1": 0.8591549295774648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8356164383561644, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7131782945736435, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.647887323943662, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8774193548387097, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8607594936708861, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1371629238128662, + "eval_overall_accuracy": 0.9603072983354674, + "eval_overall_f1": 0.8283687943262411, + "eval_overall_precision": 0.7956403269754768, + "eval_overall_recall": 0.863905325443787, + "eval_runtime": 0.8758, + "eval_samples_per_second": 213.508, + "eval_steps_per_second": 3.425, + "step": 2438 + }, + { + "epoch": 24.0, + "grad_norm": 0.8735907673835754, + "learning_rate": 3.8e-05, + "loss": 0.0574, + "step": 2544 + }, + { + "epoch": 24.0, + "eval_LOCATION_f1": 0.8551724137931034, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8157894736842105, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.696969696969697, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6216216216216216, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8881789137380192, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8275862068965517, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.14183317124843597, + "eval_overall_accuracy": 0.9626120358514725, + "eval_overall_f1": 0.8286516853932584, + "eval_overall_precision": 0.7887700534759359, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.876, + "eval_samples_per_second": 213.47, + "eval_steps_per_second": 3.425, + "step": 2544 + }, + { + "epoch": 25.0, + "grad_norm": 0.4508860409259796, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0542, + "step": 2650 + }, + { + "epoch": 25.0, + "eval_LOCATION_f1": 0.8671328671328672, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8378378378378378, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7272727272727273, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6984126984126984, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8817891373801917, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8571428571428571, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.13426989316940308, + "eval_overall_accuracy": 0.9633802816901409, + "eval_overall_f1": 0.84, + "eval_overall_precision": 0.8121546961325967, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.8752, + "eval_samples_per_second": 213.654, + "eval_steps_per_second": 3.428, + "step": 2650 + }, + { + "epoch": 26.0, + "grad_norm": 0.8999614715576172, + "learning_rate": 3.7e-05, + "loss": 0.0526, + "step": 2756 + }, + { + "epoch": 26.0, + "eval_LOCATION_f1": 0.8652482269503546, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8472222222222222, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7540983606557378, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.71875, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8789808917197451, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.761904761904762, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7272727272727273, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1440158635377884, + "eval_overall_accuracy": 0.963124199743918, + "eval_overall_f1": 0.8412017167381973, + "eval_overall_precision": 0.814404432132964, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.8876, + "eval_samples_per_second": 210.689, + "eval_steps_per_second": 3.38, + "step": 2756 + }, + { + "epoch": 27.0, + "grad_norm": 0.6169599890708923, + "learning_rate": 3.65e-05, + "loss": 0.0477, + "step": 2862 + }, + { + "epoch": 27.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7142857142857142, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6617647058823529, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8662420382165605, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8395061728395061, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8196721311475409, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.16048064827919006, + "eval_overall_accuracy": 0.9592829705505762, + "eval_overall_f1": 0.8241912798874824, + "eval_overall_precision": 0.7855227882037533, + "eval_overall_recall": 0.8668639053254438, + "eval_runtime": 0.8784, + "eval_samples_per_second": 212.876, + "eval_steps_per_second": 3.415, + "step": 2862 + }, + { + "epoch": 28.0, + "grad_norm": 0.5325952768325806, + "learning_rate": 3.6e-05, + "loss": 0.048, + "step": 2968 + }, + { + "epoch": 28.0, + "eval_LOCATION_f1": 0.8671328671328672, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8378378378378378, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7086614173228347, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6521739130434783, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1558757722377777, + "eval_overall_accuracy": 0.9580025608194622, + "eval_overall_f1": 0.830028328611898, + "eval_overall_precision": 0.7961956521739131, + "eval_overall_recall": 0.8668639053254438, + "eval_runtime": 0.8788, + "eval_samples_per_second": 212.794, + "eval_steps_per_second": 3.414, + "step": 2968 + }, + { + "epoch": 29.0, + "grad_norm": 0.5985626578330994, + "learning_rate": 3.55e-05, + "loss": 0.0481, + "step": 3074 + }, + { + "epoch": 29.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7007299270072993, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6075949367088608, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8196721311475409, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.15555179119110107, + "eval_overall_accuracy": 0.9595390524967989, + "eval_overall_f1": 0.8282548476454293, + "eval_overall_precision": 0.7786458333333334, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.8856, + "eval_samples_per_second": 211.154, + "eval_steps_per_second": 3.387, + "step": 3074 + }, + { + "epoch": 30.0, + "grad_norm": 0.8177056908607483, + "learning_rate": 3.5e-05, + "loss": 0.045, + "step": 3180 + }, + { + "epoch": 30.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7068965517241379, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.7068965517241379, + "eval_ORGANIZATION_recall": 0.7068965517241379, + "eval_PERSON_f1": 0.8789808917197451, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.761904761904762, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7272727272727273, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1506585329771042, + "eval_overall_accuracy": 0.9620998719590269, + "eval_overall_f1": 0.8342857142857144, + "eval_overall_precision": 0.8066298342541437, + "eval_overall_recall": 0.863905325443787, + "eval_runtime": 0.8833, + "eval_samples_per_second": 211.711, + "eval_steps_per_second": 3.396, + "step": 3180 + }, + { + "epoch": 31.0, + "grad_norm": 0.42287537455558777, + "learning_rate": 3.45e-05, + "loss": 0.0445, + "step": 3286 + }, + { + "epoch": 31.0, + "eval_LOCATION_f1": 0.8590604026845639, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7343749999999999, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6714285714285714, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8874598070739551, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8679245283018868, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.7868852459016394, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.15119028091430664, + "eval_overall_accuracy": 0.9618437900128041, + "eval_overall_f1": 0.8330995792426368, + "eval_overall_precision": 0.792, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.8769, + "eval_samples_per_second": 213.246, + "eval_steps_per_second": 3.421, + "step": 3286 + }, + { + "epoch": 32.0, + "grad_norm": 0.3716237246990204, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.042, + "step": 3392 + }, + { + "epoch": 32.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.716417910447761, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.631578947368421, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.877742946708464, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8383233532934131, + "eval_PERSON_recall": 0.9210526315789473, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8064516129032258, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7575757575757576, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1590593457221985, + "eval_overall_accuracy": 0.9595390524967989, + "eval_overall_f1": 0.8303448275862069, + "eval_overall_precision": 0.7777777777777778, + "eval_overall_recall": 0.8905325443786982, + "eval_runtime": 0.8744, + "eval_samples_per_second": 213.872, + "eval_steps_per_second": 3.431, + "step": 3392 + }, + { + "epoch": 33.0, + "grad_norm": 0.18944571912288666, + "learning_rate": 3.35e-05, + "loss": 0.0394, + "step": 3498 + }, + { + "epoch": 33.0, + "eval_LOCATION_f1": 0.8611111111111112, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8266666666666667, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7040000000000001, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6567164179104478, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8853503184713376, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8580246913580247, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.7868852459016394, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1586446911096573, + "eval_overall_accuracy": 0.9603072983354674, + "eval_overall_f1": 0.8276836158192091, + "eval_overall_precision": 0.7918918918918919, + "eval_overall_recall": 0.8668639053254438, + "eval_runtime": 0.8776, + "eval_samples_per_second": 213.07, + "eval_steps_per_second": 3.418, + "step": 3498 + }, + { + "epoch": 34.0, + "grad_norm": 1.3505786657333374, + "learning_rate": 3.3e-05, + "loss": 0.0378, + "step": 3604 + }, + { + "epoch": 34.0, + "eval_LOCATION_f1": 0.8571428571428572, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8076923076923077, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7131782945736435, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.647887323943662, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8789808917197451, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.15407726168632507, + "eval_overall_accuracy": 0.9618437900128041, + "eval_overall_f1": 0.8291316526610645, + "eval_overall_precision": 0.7872340425531915, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.8762, + "eval_samples_per_second": 213.42, + "eval_steps_per_second": 3.424, + "step": 3604 + }, + { + "epoch": 35.0, + "grad_norm": 0.631050705909729, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.0375, + "step": 3710 + }, + { + "epoch": 35.0, + "eval_LOCATION_f1": 0.8571428571428572, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8076923076923077, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.6725663716814159, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6909090909090909, + "eval_ORGANIZATION_recall": 0.6551724137931034, + "eval_PERSON_f1": 0.8945686900958467, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8695652173913043, + "eval_PERSON_recall": 0.9210526315789473, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.15462984144687653, + "eval_overall_accuracy": 0.9626120358514725, + "eval_overall_f1": 0.8309455587392551, + "eval_overall_precision": 0.8055555555555556, + "eval_overall_recall": 0.8579881656804734, + "eval_runtime": 0.8693, + "eval_samples_per_second": 215.126, + "eval_steps_per_second": 3.451, + "step": 3710 + }, + { + "epoch": 36.0, + "grad_norm": 0.13165172934532166, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.0384, + "step": 3816 + }, + { + "epoch": 36.0, + "eval_LOCATION_f1": 0.8749999999999999, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.84, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.71875, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6571428571428571, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.896774193548387, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.879746835443038, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.761904761904762, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7272727272727273, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.16041359305381775, + "eval_overall_accuracy": 0.9623559539052496, + "eval_overall_f1": 0.84375, + "eval_overall_precision": 0.8114754098360656, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.8806, + "eval_samples_per_second": 212.356, + "eval_steps_per_second": 3.407, + "step": 3816 + }, + { + "epoch": 37.0, + "grad_norm": 0.39163538813591003, + "learning_rate": 3.15e-05, + "loss": 0.0337, + "step": 3922 + }, + { + "epoch": 37.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7166666666666668, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6935483870967742, + "eval_ORGANIZATION_recall": 0.7413793103448276, + "eval_PERSON_f1": 0.8789808917197451, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.896551724137931, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.896551724137931, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.16409936547279358, + "eval_overall_accuracy": 0.9623559539052496, + "eval_overall_f1": 0.8421052631578948, + "eval_overall_precision": 0.810958904109589, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.8782, + "eval_samples_per_second": 212.924, + "eval_steps_per_second": 3.416, + "step": 3922 + }, + { + "epoch": 38.0, + "grad_norm": 1.8893556594848633, + "learning_rate": 3.1e-05, + "loss": 0.0327, + "step": 4028 + }, + { + "epoch": 38.0, + "eval_LOCATION_f1": 0.8571428571428572, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8076923076923077, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7258064516129032, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6818181818181818, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8810289389067525, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8616352201257862, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7868852459016394, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1764567792415619, + "eval_overall_accuracy": 0.9600512163892445, + "eval_overall_f1": 0.8316831683168318, + "eval_overall_precision": 0.7967479674796748, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.8812, + "eval_samples_per_second": 212.213, + "eval_steps_per_second": 3.404, + "step": 4028 + }, + { + "epoch": 39.0, + "grad_norm": 1.0850064754486084, + "learning_rate": 3.05e-05, + "loss": 0.033, + "step": 4134 + }, + { + "epoch": 39.0, + "eval_LOCATION_f1": 0.8611111111111112, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8266666666666667, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7258064516129032, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6818181818181818, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8810289389067525, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8616352201257862, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7868852459016394, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.16998325288295746, + "eval_overall_accuracy": 0.9600512163892445, + "eval_overall_f1": 0.8323863636363636, + "eval_overall_precision": 0.8005464480874317, + "eval_overall_recall": 0.8668639053254438, + "eval_runtime": 0.8888, + "eval_samples_per_second": 210.391, + "eval_steps_per_second": 3.375, + "step": 4134 + }, + { + "epoch": 40.0, + "grad_norm": 0.8192635774612427, + "learning_rate": 3e-05, + "loss": 0.0333, + "step": 4240 + }, + { + "epoch": 40.0, + "eval_LOCATION_f1": 0.8493150684931507, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8051948051948052, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7200000000000001, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6716417910447762, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8810289389067525, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8616352201257862, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.17454804480075836, + "eval_overall_accuracy": 0.9600512163892445, + "eval_overall_f1": 0.8312056737588652, + "eval_overall_precision": 0.7983651226158038, + "eval_overall_recall": 0.8668639053254438, + "eval_runtime": 0.8709, + "eval_samples_per_second": 214.732, + "eval_steps_per_second": 3.445, + "step": 4240 + }, + { + "epoch": 41.0, + "grad_norm": 1.14230477809906, + "learning_rate": 2.95e-05, + "loss": 0.0296, + "step": 4346 + }, + { + "epoch": 41.0, + "eval_LOCATION_f1": 0.851063829787234, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8333333333333334, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.7086614173228347, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6521739130434783, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8662420382165605, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8395061728395061, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.17790456116199493, + "eval_overall_accuracy": 0.9613316261203585, + "eval_overall_f1": 0.8283687943262411, + "eval_overall_precision": 0.7956403269754768, + "eval_overall_recall": 0.863905325443787, + "eval_runtime": 0.8959, + "eval_samples_per_second": 208.739, + "eval_steps_per_second": 3.349, + "step": 4346 + }, + { + "epoch": 42.0, + "grad_norm": 1.1531621217727661, + "learning_rate": 2.9e-05, + "loss": 0.0305, + "step": 4452 + }, + { + "epoch": 42.0, + "eval_LOCATION_f1": 0.851063829787234, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8333333333333334, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.6666666666666667, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6176470588235294, + "eval_ORGANIZATION_recall": 0.7241379310344828, + "eval_PERSON_f1": 0.8874598070739551, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8679245283018868, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1733568161725998, + "eval_overall_accuracy": 0.9618437900128041, + "eval_overall_f1": 0.8297567954220315, + "eval_overall_precision": 0.8033240997229917, + "eval_overall_recall": 0.8579881656804734, + "eval_runtime": 0.8898, + "eval_samples_per_second": 210.16, + "eval_steps_per_second": 3.372, + "step": 4452 + }, + { + "epoch": 43.0, + "grad_norm": 0.12765854597091675, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.029, + "step": 4558 + }, + { + "epoch": 43.0, + "eval_LOCATION_f1": 0.8591549295774648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8356164383561644, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7401574803149606, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6811594202898551, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8782051282051281, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.85625, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8196721311475409, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.18485189974308014, + "eval_overall_accuracy": 0.9610755441741358, + "eval_overall_f1": 0.8368794326241134, + "eval_overall_precision": 0.8038147138964578, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.8698, + "eval_samples_per_second": 214.991, + "eval_steps_per_second": 3.449, + "step": 4558 + }, + { + "epoch": 44.0, + "grad_norm": 0.6099337935447693, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.0289, + "step": 4664 + }, + { + "epoch": 44.0, + "eval_LOCATION_f1": 0.8689655172413793, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8289473684210527, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.6942148760330579, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7241379310344828, + "eval_PERSON_f1": 0.8745980707395499, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8553459119496856, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.170259490609169, + "eval_overall_accuracy": 0.9610755441741358, + "eval_overall_f1": 0.833810888252149, + "eval_overall_precision": 0.8083333333333333, + "eval_overall_recall": 0.8609467455621301, + "eval_runtime": 0.8888, + "eval_samples_per_second": 210.406, + "eval_steps_per_second": 3.375, + "step": 4664 + }, + { + "epoch": 45.0, + "grad_norm": 1.0691465139389038, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.0294, + "step": 4770 + }, + { + "epoch": 45.0, + "eval_LOCATION_f1": 0.8413793103448277, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8026315789473685, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.7540983606557378, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.71875, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8782051282051281, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.85625, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8196721311475409, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1816970556974411, + "eval_overall_accuracy": 0.9610755441741358, + "eval_overall_f1": 0.8352272727272727, + "eval_overall_precision": 0.8032786885245902, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.8854, + "eval_samples_per_second": 211.192, + "eval_steps_per_second": 3.388, + "step": 4770 + }, + { + "epoch": 46.0, + "grad_norm": 0.2375423014163971, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.0262, + "step": 4876 + }, + { + "epoch": 46.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7213114754098361, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6875, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8874598070739551, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8679245283018868, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8196721311475409, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1889667958021164, + "eval_overall_accuracy": 0.9626120358514725, + "eval_overall_f1": 0.8392603129445234, + "eval_overall_precision": 0.8082191780821918, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.8885, + "eval_samples_per_second": 210.463, + "eval_steps_per_second": 3.376, + "step": 4876 + }, + { + "epoch": 47.0, + "grad_norm": 0.48538097739219666, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.0261, + "step": 4982 + }, + { + "epoch": 47.0, + "eval_LOCATION_f1": 0.8611111111111112, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8266666666666667, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7438016528925621, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.7142857142857143, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8838709677419354, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8670886075949367, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.18387795984745026, + "eval_overall_accuracy": 0.9626120358514725, + "eval_overall_f1": 0.8464849354375897, + "eval_overall_precision": 0.8217270194986073, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.8841, + "eval_samples_per_second": 211.519, + "eval_steps_per_second": 3.393, + "step": 4982 + }, + { + "epoch": 48.0, + "grad_norm": 1.2411327362060547, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.027, + "step": 5088 + }, + { + "epoch": 48.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7343749999999999, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6714285714285714, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8838709677419354, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8670886075949367, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8135593220338982, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.18413321673870087, + "eval_overall_accuracy": 0.9615877080665813, + "eval_overall_f1": 0.8389830508474576, + "eval_overall_precision": 0.8027027027027027, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.8687, + "eval_samples_per_second": 215.275, + "eval_steps_per_second": 3.454, + "step": 5088 + }, + { + "epoch": 49.0, + "grad_norm": 1.1320481300354004, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.0243, + "step": 5194 + }, + { + "epoch": 49.0, + "eval_LOCATION_f1": 0.8652482269503546, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8472222222222222, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.717948717948718, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.711864406779661, + "eval_ORGANIZATION_recall": 0.7241379310344828, + "eval_PERSON_f1": 0.8810289389067525, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8616352201257862, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.18021155893802643, + "eval_overall_accuracy": 0.9628681177976952, + "eval_overall_f1": 0.8405797101449276, + "eval_overall_precision": 0.8238636363636364, + "eval_overall_recall": 0.8579881656804734, + "eval_runtime": 0.8873, + "eval_samples_per_second": 210.759, + "eval_steps_per_second": 3.381, + "step": 5194 + }, + { + "epoch": 50.0, + "grad_norm": 0.6338851451873779, + "learning_rate": 2.5e-05, + "loss": 0.0243, + "step": 5300 + }, + { + "epoch": 50.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7040000000000001, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6567164179104478, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8135593220338982, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1869899332523346, + "eval_overall_accuracy": 0.9613316261203585, + "eval_overall_f1": 0.8345120226308346, + "eval_overall_precision": 0.7994579945799458, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.8731, + "eval_samples_per_second": 214.182, + "eval_steps_per_second": 3.436, + "step": 5300 + }, + { + "epoch": 51.0, + "grad_norm": 2.7834653854370117, + "learning_rate": 2.45e-05, + "loss": 0.0256, + "step": 5406 + }, + { + "epoch": 51.0, + "eval_LOCATION_f1": 0.8611111111111112, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8266666666666667, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7022900763358779, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6301369863013698, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8838709677419354, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8670886075949367, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.19200466573238373, + "eval_overall_accuracy": 0.9613316261203585, + "eval_overall_f1": 0.8356940509915014, + "eval_overall_precision": 0.8016304347826086, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.8812, + "eval_samples_per_second": 212.219, + "eval_steps_per_second": 3.405, + "step": 5406 + }, + { + "epoch": 52.0, + "grad_norm": 0.22858576476573944, + "learning_rate": 2.4e-05, + "loss": 0.0233, + "step": 5512 + }, + { + "epoch": 52.0, + "eval_LOCATION_f1": 0.8611111111111112, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8266666666666667, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.716417910447761, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.631578947368421, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.8789808917197451, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7931034482758621, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7931034482758621, + "eval_TIME_recall": 0.7931034482758621, + "eval_loss": 0.19999483227729797, + "eval_overall_accuracy": 0.9590268886043534, + "eval_overall_f1": 0.8302945301542777, + "eval_overall_precision": 0.7893333333333333, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.8802, + "eval_samples_per_second": 212.441, + "eval_steps_per_second": 3.408, + "step": 5512 + }, + { + "epoch": 53.0, + "grad_norm": 1.8902926445007324, + "learning_rate": 2.35e-05, + "loss": 0.0233, + "step": 5618 + }, + { + "epoch": 53.0, + "eval_LOCATION_f1": 0.853146853146853, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8243243243243243, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.6923076923076923, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.625, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.19701984524726868, + "eval_overall_accuracy": 0.9608194622279129, + "eval_overall_f1": 0.8265162200282087, + "eval_overall_precision": 0.7897574123989218, + "eval_overall_recall": 0.8668639053254438, + "eval_runtime": 0.8753, + "eval_samples_per_second": 213.637, + "eval_steps_per_second": 3.427, + "step": 5618 + }, + { + "epoch": 54.0, + "grad_norm": 0.5953955054283142, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.0211, + "step": 5724 + }, + { + "epoch": 54.0, + "eval_LOCATION_f1": 0.8571428571428572, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8076923076923077, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7200000000000001, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6716417910447762, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.8064516129032259, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.78125, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.21723441779613495, + "eval_overall_accuracy": 0.9595390524967989, + "eval_overall_f1": 0.8293370944992947, + "eval_overall_precision": 0.7924528301886793, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.8826, + "eval_samples_per_second": 211.865, + "eval_steps_per_second": 3.399, + "step": 5724 + }, + { + "epoch": 55.0, + "grad_norm": 2.6492440700531006, + "learning_rate": 2.25e-05, + "loss": 0.0217, + "step": 5830 + }, + { + "epoch": 55.0, + "eval_LOCATION_f1": 0.8551724137931034, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8157894736842105, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.6507936507936507, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6029411764705882, + "eval_ORGANIZATION_recall": 0.7068965517241379, + "eval_PERSON_f1": 0.8838709677419354, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8670886075949367, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.8064516129032259, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.78125, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.19644935429096222, + "eval_overall_accuracy": 0.9608194622279129, + "eval_overall_f1": 0.829059829059829, + "eval_overall_precision": 0.7994505494505495, + "eval_overall_recall": 0.8609467455621301, + "eval_runtime": 0.8838, + "eval_samples_per_second": 211.588, + "eval_steps_per_second": 3.394, + "step": 5830 + }, + { + "epoch": 56.0, + "grad_norm": 0.8844048380851746, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.0214, + "step": 5936 + }, + { + "epoch": 56.0, + "eval_LOCATION_f1": 0.8749999999999999, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.84, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7096774193548386, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8817891373801917, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8571428571428571, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.8064516129032259, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.78125, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.20219193398952484, + "eval_overall_accuracy": 0.9620998719590269, + "eval_overall_f1": 0.8404558404558404, + "eval_overall_precision": 0.8104395604395604, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.8832, + "eval_samples_per_second": 211.741, + "eval_steps_per_second": 3.397, + "step": 5936 + }, + { + "epoch": 57.0, + "grad_norm": 0.3619399666786194, + "learning_rate": 2.15e-05, + "loss": 0.0197, + "step": 6042 + }, + { + "epoch": 57.0, + "eval_LOCATION_f1": 0.8493150684931507, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8051948051948052, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7049180327868853, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.671875, + "eval_ORGANIZATION_recall": 0.7413793103448276, + "eval_PERSON_f1": 0.8903225806451613, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8734177215189873, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8135593220338982, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.20077820122241974, + "eval_overall_accuracy": 0.9608194622279129, + "eval_overall_f1": 0.8330955777460769, + "eval_overall_precision": 0.8044077134986226, + "eval_overall_recall": 0.863905325443787, + "eval_runtime": 0.9531, + "eval_samples_per_second": 196.195, + "eval_steps_per_second": 3.148, + "step": 6042 + }, + { + "epoch": 58.0, + "grad_norm": 0.1735541820526123, + "learning_rate": 2.1e-05, + "loss": 0.0216, + "step": 6148 + }, + { + "epoch": 58.0, + "eval_LOCATION_f1": 0.8551724137931034, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8157894736842105, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7131782945736435, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.647887323943662, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8782051282051281, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.85625, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.21140815317630768, + "eval_overall_accuracy": 0.9597951344430218, + "eval_overall_f1": 0.8333333333333334, + "eval_overall_precision": 0.7972972972972973, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.8863, + "eval_samples_per_second": 210.995, + "eval_steps_per_second": 3.385, + "step": 6148 + }, + { + "epoch": 59.0, + "grad_norm": 0.837820291519165, + "learning_rate": 2.05e-05, + "loss": 0.0197, + "step": 6254 + }, + { + "epoch": 59.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7244094488188977, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.20840610563755035, + "eval_overall_accuracy": 0.9600512163892445, + "eval_overall_f1": 0.8389830508474576, + "eval_overall_precision": 0.8027027027027027, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.8868, + "eval_samples_per_second": 210.865, + "eval_steps_per_second": 3.383, + "step": 6254 + }, + { + "epoch": 60.0, + "grad_norm": 0.5633171796798706, + "learning_rate": 2e-05, + "loss": 0.0182, + "step": 6360 + }, + { + "epoch": 60.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7286821705426356, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6619718309859155, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8726114649681528, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.845679012345679, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.21587228775024414, + "eval_overall_accuracy": 0.9603072983354674, + "eval_overall_f1": 0.8370786516853933, + "eval_overall_precision": 0.7967914438502673, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.8748, + "eval_samples_per_second": 213.775, + "eval_steps_per_second": 3.43, + "step": 6360 + }, + { + "epoch": 61.0, + "grad_norm": 0.4832909405231476, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.0196, + "step": 6466 + }, + { + "epoch": 61.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7200000000000001, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6716417910447762, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.20733201503753662, + "eval_overall_accuracy": 0.9615877080665813, + "eval_overall_f1": 0.8425531914893617, + "eval_overall_precision": 0.8092643051771117, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.9535, + "eval_samples_per_second": 196.123, + "eval_steps_per_second": 3.146, + "step": 6466 + }, + { + "epoch": 62.0, + "grad_norm": 0.6487990617752075, + "learning_rate": 1.9e-05, + "loss": 0.0176, + "step": 6572 + }, + { + "epoch": 62.0, + "eval_LOCATION_f1": 0.8671328671328672, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8378378378378378, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7213114754098361, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6875, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8769716088328074, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8424242424242424, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7931034482758621, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7931034482758621, + "eval_TIME_recall": 0.7931034482758621, + "eval_loss": 0.20125307142734528, + "eval_overall_accuracy": 0.9605633802816902, + "eval_overall_f1": 0.8335704125177809, + "eval_overall_precision": 0.8027397260273973, + "eval_overall_recall": 0.8668639053254438, + "eval_runtime": 0.8874, + "eval_samples_per_second": 210.718, + "eval_steps_per_second": 3.38, + "step": 6572 + }, + { + "epoch": 63.0, + "grad_norm": 0.8727775812149048, + "learning_rate": 1.85e-05, + "loss": 0.0157, + "step": 6678 + }, + { + "epoch": 63.0, + "eval_LOCATION_f1": 0.8356164383561644, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7922077922077922, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.6833333333333333, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6612903225806451, + "eval_ORGANIZATION_recall": 0.7068965517241379, + "eval_PERSON_f1": 0.8838709677419354, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8670886075949367, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.8064516129032259, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.78125, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.20443986356258392, + "eval_overall_accuracy": 0.9600512163892445, + "eval_overall_f1": 0.828080229226361, + "eval_overall_precision": 0.8027777777777778, + "eval_overall_recall": 0.8550295857988166, + "eval_runtime": 0.8782, + "eval_samples_per_second": 212.933, + "eval_steps_per_second": 3.416, + "step": 6678 + }, + { + "epoch": 64.0, + "grad_norm": 1.5122424364089966, + "learning_rate": 1.8e-05, + "loss": 0.0177, + "step": 6784 + }, + { + "epoch": 64.0, + "eval_LOCATION_f1": 0.8476821192052981, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7175572519083969, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6438356164383562, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8652037617554859, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8263473053892215, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.22791583836078644, + "eval_overall_accuracy": 0.9590268886043534, + "eval_overall_f1": 0.8298755186721992, + "eval_overall_precision": 0.7792207792207793, + "eval_overall_recall": 0.8875739644970414, + "eval_runtime": 0.8723, + "eval_samples_per_second": 214.372, + "eval_steps_per_second": 3.439, + "step": 6784 + }, + { + "epoch": 65.0, + "grad_norm": 1.5700312852859497, + "learning_rate": 1.75e-05, + "loss": 0.0165, + "step": 6890 + }, + { + "epoch": 65.0, + "eval_LOCATION_f1": 0.8493150684931507, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8051948051948052, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7301587301587301, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6764705882352942, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8404907975460123, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.761904761904762, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7272727272727273, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8135593220338982, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.21637500822544098, + "eval_overall_accuracy": 0.9585147247119078, + "eval_overall_f1": 0.8265162200282087, + "eval_overall_precision": 0.7897574123989218, + "eval_overall_recall": 0.8668639053254438, + "eval_runtime": 0.8725, + "eval_samples_per_second": 214.338, + "eval_steps_per_second": 3.439, + "step": 6890 + }, + { + "epoch": 66.0, + "grad_norm": 0.9781705737113953, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.0171, + "step": 6996 + }, + { + "epoch": 66.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.746031746031746, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6911764705882353, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8734177215189874, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.21887072920799255, + "eval_overall_accuracy": 0.9620998719590269, + "eval_overall_f1": 0.8426966292134831, + "eval_overall_precision": 0.8021390374331551, + "eval_overall_recall": 0.8875739644970414, + "eval_runtime": 0.8649, + "eval_samples_per_second": 216.219, + "eval_steps_per_second": 3.469, + "step": 6996 + }, + { + "epoch": 67.0, + "grad_norm": 0.7777842879295349, + "learning_rate": 1.65e-05, + "loss": 0.0167, + "step": 7102 + }, + { + "epoch": 67.0, + "eval_LOCATION_f1": 0.8590604026845639, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7286821705426356, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6619718309859155, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8832807570977917, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8484848484848485, + "eval_PERSON_recall": 0.9210526315789473, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7812500000000001, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7142857142857143, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.23059594631195068, + "eval_overall_accuracy": 0.9597951344430218, + "eval_overall_f1": 0.8337950138504154, + "eval_overall_precision": 0.7838541666666666, + "eval_overall_recall": 0.8905325443786982, + "eval_runtime": 0.8828, + "eval_samples_per_second": 211.834, + "eval_steps_per_second": 3.398, + "step": 7102 + }, + { + "epoch": 68.0, + "grad_norm": 0.42934471368789673, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.0156, + "step": 7208 + }, + { + "epoch": 68.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7580645161290323, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.7121212121212122, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8853503184713376, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8580246913580247, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.8064516129032259, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.78125, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.21977542340755463, + "eval_overall_accuracy": 0.9623559539052496, + "eval_overall_f1": 0.8498583569405098, + "eval_overall_precision": 0.8152173913043478, + "eval_overall_recall": 0.8875739644970414, + "eval_runtime": 0.8618, + "eval_samples_per_second": 216.978, + "eval_steps_per_second": 3.481, + "step": 7208 + }, + { + "epoch": 69.0, + "grad_norm": 1.0344369411468506, + "learning_rate": 1.55e-05, + "loss": 0.0156, + "step": 7314 + }, + { + "epoch": 69.0, + "eval_LOCATION_f1": 0.8590604026845639, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7175572519083969, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6438356164383562, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8903225806451613, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8734177215189873, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.21617823839187622, + "eval_overall_accuracy": 0.9615877080665813, + "eval_overall_f1": 0.8398876404494382, + "eval_overall_precision": 0.7994652406417112, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.8721, + "eval_samples_per_second": 214.436, + "eval_steps_per_second": 3.44, + "step": 7314 + }, + { + "epoch": 70.0, + "grad_norm": 1.9640426635742188, + "learning_rate": 1.5e-05, + "loss": 0.0155, + "step": 7420 + }, + { + "epoch": 70.0, + "eval_LOCATION_f1": 0.8421052631578948, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7710843373493976, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7086614173228347, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6521739130434783, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8797468354430381, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8475609756097561, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.22830812633037567, + "eval_overall_accuracy": 0.9592829705505762, + "eval_overall_f1": 0.83008356545961, + "eval_overall_precision": 0.7842105263157895, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.8737, + "eval_samples_per_second": 214.022, + "eval_steps_per_second": 3.434, + "step": 7420 + }, + { + "epoch": 71.0, + "grad_norm": 0.16448061168193817, + "learning_rate": 1.45e-05, + "loss": 0.0142, + "step": 7526 + }, + { + "epoch": 71.0, + "eval_LOCATION_f1": 0.8513513513513514, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7974683544303798, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.6911764705882352, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6025641025641025, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8782051282051281, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.85625, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.2332638055086136, + "eval_overall_accuracy": 0.9590268886043534, + "eval_overall_f1": 0.83008356545961, + "eval_overall_precision": 0.7842105263157895, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.8749, + "eval_samples_per_second": 213.733, + "eval_steps_per_second": 3.429, + "step": 7526 + }, + { + "epoch": 72.0, + "grad_norm": 0.25591084361076355, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.0165, + "step": 7632 + }, + { + "epoch": 72.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7401574803149606, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6811594202898551, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8745980707395499, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8553459119496856, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.8253968253968254, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7878787878787878, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.2185453474521637, + "eval_overall_accuracy": 0.9620998719590269, + "eval_overall_f1": 0.8429985855728428, + "eval_overall_precision": 0.8075880758807588, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.8809, + "eval_samples_per_second": 212.292, + "eval_steps_per_second": 3.406, + "step": 7632 + }, + { + "epoch": 73.0, + "grad_norm": 0.047924935817718506, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.0145, + "step": 7738 + }, + { + "epoch": 73.0, + "eval_LOCATION_f1": 0.8378378378378378, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7848101265822784, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7258064516129032, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6818181818181818, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8774193548387097, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8607594936708861, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.8253968253968254, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7878787878787878, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.22260642051696777, + "eval_overall_accuracy": 0.9605633802816902, + "eval_overall_f1": 0.8352272727272727, + "eval_overall_precision": 0.8032786885245902, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.8798, + "eval_samples_per_second": 212.544, + "eval_steps_per_second": 3.41, + "step": 7738 + }, + { + "epoch": 74.0, + "grad_norm": 0.29497218132019043, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.0135, + "step": 7844 + }, + { + "epoch": 74.0, + "eval_LOCATION_f1": 0.8571428571428572, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8076923076923077, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.688, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6417910447761194, + "eval_ORGANIZATION_recall": 0.7413793103448276, + "eval_PERSON_f1": 0.8810289389067525, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8616352201257862, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8524590163934426, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.2320089489221573, + "eval_overall_accuracy": 0.9595390524967989, + "eval_overall_f1": 0.8316831683168318, + "eval_overall_precision": 0.7967479674796748, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.8782, + "eval_samples_per_second": 212.931, + "eval_steps_per_second": 3.416, + "step": 7844 + }, + { + "epoch": 75.0, + "grad_norm": 0.01762392371892929, + "learning_rate": 1.25e-05, + "loss": 0.0138, + "step": 7950 + }, + { + "epoch": 75.0, + "eval_LOCATION_f1": 0.8456375838926175, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7286821705426356, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6619718309859155, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8789808917197451, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8196721311475409, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.2413773536682129, + "eval_overall_accuracy": 0.9585147247119078, + "eval_overall_f1": 0.8324022346368716, + "eval_overall_precision": 0.7883597883597884, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.883, + "eval_samples_per_second": 211.783, + "eval_steps_per_second": 3.398, + "step": 7950 + }, + { + "epoch": 76.0, + "grad_norm": 0.21732616424560547, + "learning_rate": 1.2e-05, + "loss": 0.0129, + "step": 8056 + }, + { + "epoch": 76.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7213114754098361, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6875, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.8387096774193549, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.8125, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.22952446341514587, + "eval_overall_accuracy": 0.9615877080665813, + "eval_overall_f1": 0.8473609129814551, + "eval_overall_precision": 0.8181818181818182, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.8794, + "eval_samples_per_second": 212.653, + "eval_steps_per_second": 3.412, + "step": 8056 + }, + { + "epoch": 77.0, + "grad_norm": 0.42748743295669556, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.0118, + "step": 8162 + }, + { + "epoch": 77.0, + "eval_LOCATION_f1": 0.8322147651006713, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.775, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7301587301587301, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6764705882352942, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8734177215189874, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8414634146341463, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.23922854661941528, + "eval_overall_accuracy": 0.9595390524967989, + "eval_overall_f1": 0.8319327731092437, + "eval_overall_precision": 0.7898936170212766, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.8708, + "eval_samples_per_second": 214.745, + "eval_steps_per_second": 3.445, + "step": 8162 + }, + { + "epoch": 78.0, + "grad_norm": 0.6394297480583191, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0127, + "step": 8268 + }, + { + "epoch": 78.0, + "eval_LOCATION_f1": 0.8689655172413793, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8289473684210527, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.736, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6865671641791045, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8810289389067525, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8616352201257862, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.2345399558544159, + "eval_overall_accuracy": 0.9615877080665813, + "eval_overall_f1": 0.8433048433048432, + "eval_overall_precision": 0.8131868131868132, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.8826, + "eval_samples_per_second": 211.871, + "eval_steps_per_second": 3.399, + "step": 8268 + }, + { + "epoch": 79.0, + "grad_norm": 0.5575834512710571, + "learning_rate": 1.05e-05, + "loss": 0.0122, + "step": 8374 + }, + { + "epoch": 79.0, + "eval_LOCATION_f1": 0.8456375838926175, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.746031746031746, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6911764705882353, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8825396825396825, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.852760736196319, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.24232904613018036, + "eval_overall_accuracy": 0.9608194622279129, + "eval_overall_f1": 0.8410689170182841, + "eval_overall_precision": 0.8016085790884718, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.8823, + "eval_samples_per_second": 211.957, + "eval_steps_per_second": 3.4, + "step": 8374 + }, + { + "epoch": 80.0, + "grad_norm": 0.14352549612522125, + "learning_rate": 1e-05, + "loss": 0.0117, + "step": 8480 + }, + { + "epoch": 80.0, + "eval_LOCATION_f1": 0.881118881118881, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8513513513513513, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7317073170731708, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6923076923076923, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8782051282051281, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.85625, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8275862068965517, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.22932307422161102, + "eval_overall_accuracy": 0.9623559539052496, + "eval_overall_f1": 0.8412017167381973, + "eval_overall_precision": 0.814404432132964, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.882, + "eval_samples_per_second": 212.026, + "eval_steps_per_second": 3.401, + "step": 8480 + }, + { + "epoch": 81.0, + "grad_norm": 0.47307726740837097, + "learning_rate": 9.5e-06, + "loss": 0.012, + "step": 8586 + }, + { + "epoch": 81.0, + "eval_LOCATION_f1": 0.8456375838926175, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.71875, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6571428571428571, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8881789137380192, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.761904761904762, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7272727272727273, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.23925283551216125, + "eval_overall_accuracy": 0.9600512163892445, + "eval_overall_f1": 0.8370786516853933, + "eval_overall_precision": 0.7967914438502673, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.8824, + "eval_samples_per_second": 211.922, + "eval_steps_per_second": 3.4, + "step": 8586 + }, + { + "epoch": 82.0, + "grad_norm": 1.2593061923980713, + "learning_rate": 9e-06, + "loss": 0.013, + "step": 8692 + }, + { + "epoch": 82.0, + "eval_LOCATION_f1": 0.8456375838926175, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7333333333333333, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.7096774193548387, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.23163729906082153, + "eval_overall_accuracy": 0.9608194622279129, + "eval_overall_f1": 0.8404558404558404, + "eval_overall_precision": 0.8104395604395604, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.8706, + "eval_samples_per_second": 214.791, + "eval_steps_per_second": 3.446, + "step": 8692 + }, + { + "epoch": 83.0, + "grad_norm": 0.8594510555267334, + "learning_rate": 8.500000000000002e-06, + "loss": 0.0115, + "step": 8798 + }, + { + "epoch": 83.0, + "eval_LOCATION_f1": 0.8435374149659864, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7948717948717948, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7096774193548386, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8874598070739551, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8679245283018868, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.23205186426639557, + "eval_overall_accuracy": 0.9605633802816902, + "eval_overall_f1": 0.8380681818181818, + "eval_overall_precision": 0.8060109289617486, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.8704, + "eval_samples_per_second": 214.832, + "eval_steps_per_second": 3.447, + "step": 8798 + }, + { + "epoch": 84.0, + "grad_norm": 0.06097917631268501, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0124, + "step": 8904 + }, + { + "epoch": 84.0, + "eval_LOCATION_f1": 0.8571428571428572, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8076923076923077, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7107438016528926, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6825396825396826, + "eval_ORGANIZATION_recall": 0.7413793103448276, + "eval_PERSON_f1": 0.891025641025641, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.23106266558170319, + "eval_overall_accuracy": 0.9615877080665813, + "eval_overall_f1": 0.8433048433048432, + "eval_overall_precision": 0.8131868131868132, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.8817, + "eval_samples_per_second": 212.094, + "eval_steps_per_second": 3.403, + "step": 8904 + }, + { + "epoch": 85.0, + "grad_norm": 0.0979015976190567, + "learning_rate": 7.5e-06, + "loss": 0.0118, + "step": 9010 + }, + { + "epoch": 85.0, + "eval_LOCATION_f1": 0.8456375838926175, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7096774193548386, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8810289389067525, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8616352201257862, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.24057278037071228, + "eval_overall_accuracy": 0.9603072983354674, + "eval_overall_f1": 0.8328611898016997, + "eval_overall_precision": 0.7989130434782609, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.9001, + "eval_samples_per_second": 207.748, + "eval_steps_per_second": 3.333, + "step": 9010 + }, + { + "epoch": 86.0, + "grad_norm": 2.0524749755859375, + "learning_rate": 7.000000000000001e-06, + "loss": 0.0115, + "step": 9116 + }, + { + "epoch": 86.0, + "eval_LOCATION_f1": 0.8456375838926175, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7317073170731708, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6923076923076923, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8817891373801917, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8571428571428571, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.23790757358074188, + "eval_overall_accuracy": 0.9610755441741358, + "eval_overall_f1": 0.8373408769448374, + "eval_overall_precision": 0.8021680216802168, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.8768, + "eval_samples_per_second": 213.284, + "eval_steps_per_second": 3.422, + "step": 9116 + }, + { + "epoch": 87.0, + "grad_norm": 0.27762410044670105, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.0116, + "step": 9222 + }, + { + "epoch": 87.0, + "eval_LOCATION_f1": 0.8513513513513514, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7974683544303798, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7419354838709677, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.696969696969697, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.8064516129032259, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.78125, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.23644272983074188, + "eval_overall_accuracy": 0.9623559539052496, + "eval_overall_f1": 0.8453900709219858, + "eval_overall_precision": 0.8119891008174387, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.8829, + "eval_samples_per_second": 211.807, + "eval_steps_per_second": 3.398, + "step": 9222 + }, + { + "epoch": 88.0, + "grad_norm": 0.8406667709350586, + "learning_rate": 6e-06, + "loss": 0.0107, + "step": 9328 + }, + { + "epoch": 88.0, + "eval_LOCATION_f1": 0.8551724137931034, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8157894736842105, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7272727272727273, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6984126984126984, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8810289389067525, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8616352201257862, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.2356145679950714, + "eval_overall_accuracy": 0.9618437900128041, + "eval_overall_f1": 0.8412017167381973, + "eval_overall_precision": 0.814404432132964, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.8796, + "eval_samples_per_second": 212.593, + "eval_steps_per_second": 3.411, + "step": 9328 + }, + { + "epoch": 89.0, + "grad_norm": 0.5086352825164795, + "learning_rate": 5.500000000000001e-06, + "loss": 0.0099, + "step": 9434 + }, + { + "epoch": 89.0, + "eval_LOCATION_f1": 0.8456375838926175, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7258064516129032, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6818181818181818, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8874598070739551, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8679245283018868, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.2441495656967163, + "eval_overall_accuracy": 0.9610755441741358, + "eval_overall_f1": 0.8413597733711048, + "eval_overall_precision": 0.8070652173913043, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.8816, + "eval_samples_per_second": 212.126, + "eval_steps_per_second": 3.403, + "step": 9434 + }, + { + "epoch": 90.0, + "grad_norm": 0.11222498118877411, + "learning_rate": 5e-06, + "loss": 0.0116, + "step": 9540 + }, + { + "epoch": 90.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7301587301587301, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6764705882352942, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.8064516129032259, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.78125, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.896551724137931, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.896551724137931, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.24032947421073914, + "eval_overall_accuracy": 0.9618437900128041, + "eval_overall_f1": 0.8470254957507082, + "eval_overall_precision": 0.8125, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.8666, + "eval_samples_per_second": 215.798, + "eval_steps_per_second": 3.462, + "step": 9540 + }, + { + "epoch": 91.0, + "grad_norm": 1.0016977787017822, + "learning_rate": 4.5e-06, + "loss": 0.0101, + "step": 9646 + }, + { + "epoch": 91.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.736, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6865671641791045, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.891025641025641, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.24104949831962585, + "eval_overall_accuracy": 0.9623559539052496, + "eval_overall_f1": 0.84822695035461, + "eval_overall_precision": 0.8147138964577657, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.8925, + "eval_samples_per_second": 209.524, + "eval_steps_per_second": 3.361, + "step": 9646 + }, + { + "epoch": 92.0, + "grad_norm": 0.5980203151702881, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0104, + "step": 9752 + }, + { + "epoch": 92.0, + "eval_LOCATION_f1": 0.8378378378378378, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7848101265822784, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7377049180327868, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.703125, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.891025641025641, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.2380075305700302, + "eval_overall_accuracy": 0.9615877080665813, + "eval_overall_f1": 0.8421052631578948, + "eval_overall_precision": 0.810958904109589, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.9541, + "eval_samples_per_second": 195.995, + "eval_steps_per_second": 3.144, + "step": 9752 + }, + { + "epoch": 93.0, + "grad_norm": 0.1031588539481163, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.0113, + "step": 9858 + }, + { + "epoch": 93.0, + "eval_LOCATION_f1": 0.8551724137931034, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8157894736842105, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7301587301587301, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6764705882352942, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.23934581875801086, + "eval_overall_accuracy": 0.9613316261203585, + "eval_overall_f1": 0.8409090909090909, + "eval_overall_precision": 0.8087431693989071, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.9053, + "eval_samples_per_second": 206.559, + "eval_steps_per_second": 3.314, + "step": 9858 + }, + { + "epoch": 94.0, + "grad_norm": 1.1558283567428589, + "learning_rate": 3e-06, + "loss": 0.0104, + "step": 9964 + }, + { + "epoch": 94.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7258064516129032, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6818181818181818, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.2401016503572464, + "eval_overall_accuracy": 0.9620998719590269, + "eval_overall_f1": 0.8421052631578948, + "eval_overall_precision": 0.810958904109589, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.8815, + "eval_samples_per_second": 212.144, + "eval_steps_per_second": 3.403, + "step": 9964 + }, + { + "epoch": 95.0, + "grad_norm": 1.1983808279037476, + "learning_rate": 2.5e-06, + "loss": 0.0108, + "step": 10070 + }, + { + "epoch": 95.0, + "eval_LOCATION_f1": 0.8493150684931507, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8051948051948052, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.746031746031746, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6911764705882353, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.2409784197807312, + "eval_overall_accuracy": 0.9618437900128041, + "eval_overall_f1": 0.8425531914893617, + "eval_overall_precision": 0.8092643051771117, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.8891, + "eval_samples_per_second": 210.332, + "eval_steps_per_second": 3.374, + "step": 10070 + }, + { + "epoch": 96.0, + "grad_norm": 1.2160520553588867, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.0102, + "step": 10176 + }, + { + "epoch": 96.0, + "eval_LOCATION_f1": 0.8513513513513514, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7974683544303798, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7200000000000001, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6716417910447762, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8853503184713376, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8580246913580247, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.2440604716539383, + "eval_overall_accuracy": 0.9613316261203585, + "eval_overall_f1": 0.840620592383639, + "eval_overall_precision": 0.8032345013477089, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.9399, + "eval_samples_per_second": 198.966, + "eval_steps_per_second": 3.192, + "step": 10176 + }, + { + "epoch": 97.0, + "grad_norm": 1.8695564270019531, + "learning_rate": 1.5e-06, + "loss": 0.0099, + "step": 10282 + }, + { + "epoch": 97.0, + "eval_LOCATION_f1": 0.8571428571428572, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8076923076923077, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7258064516129032, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6818181818181818, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.891025641025641, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.24207578599452972, + "eval_overall_accuracy": 0.9615877080665813, + "eval_overall_f1": 0.8453900709219858, + "eval_overall_precision": 0.8119891008174387, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.8801, + "eval_samples_per_second": 212.469, + "eval_steps_per_second": 3.409, + "step": 10282 + }, + { + "epoch": 98.0, + "grad_norm": 0.15321999788284302, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.009, + "step": 10388 + }, + { + "epoch": 98.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7301587301587301, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6764705882352942, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.891025641025641, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.24271713197231293, + "eval_overall_accuracy": 0.9620998719590269, + "eval_overall_f1": 0.8453900709219858, + "eval_overall_precision": 0.8119891008174387, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.8771, + "eval_samples_per_second": 213.205, + "eval_steps_per_second": 3.42, + "step": 10388 + }, + { + "epoch": 99.0, + "grad_norm": 0.49585482478141785, + "learning_rate": 5.000000000000001e-07, + "loss": 0.0096, + "step": 10494 + }, + { + "epoch": 99.0, + "eval_LOCATION_f1": 0.8513513513513514, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7974683544303798, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7096774193548386, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.891025641025641, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.86875, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.24335408210754395, + "eval_overall_accuracy": 0.9613316261203585, + "eval_overall_f1": 0.8413597733711048, + "eval_overall_precision": 0.8070652173913043, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.8907, + "eval_samples_per_second": 209.939, + "eval_steps_per_second": 3.368, + "step": 10494 + }, + { + "epoch": 100.0, + "grad_norm": 0.202548086643219, + "learning_rate": 0.0, + "loss": 0.0099, + "step": 10600 + }, + { + "epoch": 100.0, + "eval_LOCATION_f1": 0.8513513513513514, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7974683544303798, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7096774193548386, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8846153846153847, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8625, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.24319376051425934, + "eval_overall_accuracy": 0.9610755441741358, + "eval_overall_f1": 0.8385269121813032, + "eval_overall_precision": 0.8043478260869565, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.8912, + "eval_samples_per_second": 209.818, + "eval_steps_per_second": 3.366, + "step": 10600 + }, + { + "epoch": 100.0, + "step": 10600, + "total_flos": 5072850468719490.0, + "train_loss": 0.05464491794694145, + "train_runtime": 1550.8875, + "train_samples_per_second": 108.776, + "train_steps_per_second": 6.835 + } + ], + "logging_steps": 500, + "max_steps": 10600, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 500, + "total_flos": 5072850468719490.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}