diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3930 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 100.0, + "eval_steps": 500, + "global_step": 10600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "grad_norm": 1.1115612983703613, + "learning_rate": 4.9500000000000004e-05, + "loss": 0.9255, + "step": 106 + }, + { + "epoch": 1.0, + "eval_LOCATION_f1": 0.0, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.0, + "eval_LOCATION_recall": 0.0, + "eval_ORGANIZATION_f1": 0.0, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.0, + "eval_ORGANIZATION_recall": 0.0, + "eval_PERSON_f1": 0.012820512820512818, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.25, + "eval_PERSON_recall": 0.006578947368421052, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.0, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.0, + "eval_TIME_recall": 0.0, + "eval_loss": 0.6139789819717407, + "eval_overall_accuracy": 0.8402048655569783, + "eval_overall_f1": 0.005847953216374269, + "eval_overall_precision": 0.25, + "eval_overall_recall": 0.0029585798816568047, + "eval_runtime": 0.288, + "eval_samples_per_second": 649.255, + "eval_steps_per_second": 10.416, + "step": 106 + }, + { + "epoch": 2.0, + "grad_norm": 1.1083379983901978, + "learning_rate": 4.9e-05, + "loss": 0.5503, + "step": 212 + }, + { + "epoch": 2.0, + "eval_LOCATION_f1": 0.09523809523809525, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.26666666666666666, + "eval_LOCATION_recall": 0.057971014492753624, + "eval_ORGANIZATION_f1": 0.0, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.0, + "eval_ORGANIZATION_recall": 0.0, + "eval_PERSON_f1": 0.5272727272727272, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.4887640449438202, + "eval_PERSON_recall": 0.5723684210526315, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.4, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.5625, + "eval_TIME_recall": 0.3103448275862069, + "eval_loss": 0.3994744122028351, + "eval_overall_accuracy": 0.8855313700384123, + "eval_overall_f1": 0.35714285714285715, + "eval_overall_precision": 0.45045045045045046, + "eval_overall_recall": 0.2958579881656805, + "eval_runtime": 0.2908, + "eval_samples_per_second": 642.974, + "eval_steps_per_second": 10.315, + "step": 212 + }, + { + "epoch": 3.0, + "grad_norm": 1.8157479763031006, + "learning_rate": 4.85e-05, + "loss": 0.3808, + "step": 318 + }, + { + "epoch": 3.0, + "eval_LOCATION_f1": 0.5454545454545455, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.49411764705882355, + "eval_LOCATION_recall": 0.6086956521739131, + "eval_ORGANIZATION_f1": 0.2978723404255319, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.3888888888888889, + "eval_ORGANIZATION_recall": 0.2413793103448276, + "eval_PERSON_f1": 0.735632183908046, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.6530612244897959, + "eval_PERSON_recall": 0.8421052631578947, + "eval_QUANTITY_f1": 0.28571428571428564, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.2727272727272727, + "eval_QUANTITY_recall": 0.3, + "eval_TIME_f1": 0.6785714285714286, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7037037037037037, + "eval_TIME_recall": 0.6551724137931034, + "eval_loss": 0.28895071148872375, + "eval_overall_accuracy": 0.9267605633802817, + "eval_overall_f1": 0.5930069930069929, + "eval_overall_precision": 0.5623342175066313, + "eval_overall_recall": 0.6272189349112426, + "eval_runtime": 0.2887, + "eval_samples_per_second": 647.772, + "eval_steps_per_second": 10.392, + "step": 318 + }, + { + "epoch": 4.0, + "grad_norm": 1.4652637243270874, + "learning_rate": 4.8e-05, + "loss": 0.2833, + "step": 424 + }, + { + "epoch": 4.0, + "eval_LOCATION_f1": 0.6455696202531644, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.5730337078651685, + "eval_LOCATION_recall": 0.7391304347826086, + "eval_ORGANIZATION_f1": 0.4642857142857143, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.48148148148148145, + "eval_ORGANIZATION_recall": 0.4482758620689655, + "eval_PERSON_f1": 0.7904191616766467, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.7252747252747253, + "eval_PERSON_recall": 0.868421052631579, + "eval_QUANTITY_f1": 0.47058823529411764, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.42105263157894735, + "eval_QUANTITY_recall": 0.5333333333333333, + "eval_TIME_f1": 0.8275862068965517, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.21562312543392181, + "eval_overall_accuracy": 0.9405889884763124, + "eval_overall_f1": 0.6821917808219178, + "eval_overall_precision": 0.6352040816326531, + "eval_overall_recall": 0.7366863905325444, + "eval_runtime": 0.2884, + "eval_samples_per_second": 648.345, + "eval_steps_per_second": 10.401, + "step": 424 + }, + { + "epoch": 5.0, + "grad_norm": 1.5371237993240356, + "learning_rate": 4.75e-05, + "loss": 0.2213, + "step": 530 + }, + { + "epoch": 5.0, + "eval_LOCATION_f1": 0.725, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.6373626373626373, + "eval_LOCATION_recall": 0.8405797101449275, + "eval_ORGANIZATION_f1": 0.5263157894736843, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.5357142857142857, + "eval_ORGANIZATION_recall": 0.5172413793103449, + "eval_PERSON_f1": 0.8012048192771084, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.7388888888888889, + "eval_PERSON_recall": 0.875, + "eval_QUANTITY_f1": 0.5333333333333333, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.4444444444444444, + "eval_QUANTITY_recall": 0.6666666666666666, + "eval_TIME_f1": 0.8275862068965517, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.18281985819339752, + "eval_overall_accuracy": 0.9434058898847631, + "eval_overall_f1": 0.7171853856562923, + "eval_overall_precision": 0.6608478802992519, + "eval_overall_recall": 0.7840236686390533, + "eval_runtime": 0.2945, + "eval_samples_per_second": 634.984, + "eval_steps_per_second": 10.187, + "step": 530 + }, + { + "epoch": 6.0, + "grad_norm": 1.388359546661377, + "learning_rate": 4.7e-05, + "loss": 0.1845, + "step": 636 + }, + { + "epoch": 6.0, + "eval_LOCATION_f1": 0.7721518987341773, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.6853932584269663, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.5757575757575758, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.5135135135135135, + "eval_ORGANIZATION_recall": 0.6551724137931034, + "eval_PERSON_f1": 0.8192771084337349, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.7555555555555555, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.5714285714285715, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.5, + "eval_QUANTITY_recall": 0.6666666666666666, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1697721779346466, + "eval_overall_accuracy": 0.9467349551856594, + "eval_overall_f1": 0.7446808510638298, + "eval_overall_precision": 0.6763285024154589, + "eval_overall_recall": 0.8284023668639053, + "eval_runtime": 0.2919, + "eval_samples_per_second": 640.647, + "eval_steps_per_second": 10.278, + "step": 636 + }, + { + "epoch": 7.0, + "grad_norm": 0.574262261390686, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.17, + "step": 742 + }, + { + "epoch": 7.0, + "eval_LOCATION_f1": 0.8026315789473685, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7349397590361446, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.6016260162601627, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.5692307692307692, + "eval_ORGANIZATION_recall": 0.6379310344827587, + "eval_PERSON_f1": 0.8246153846153845, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.7745664739884393, + "eval_PERSON_recall": 0.881578947368421, + "eval_QUANTITY_f1": 0.7164179104477612, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6486486486486487, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1523420363664627, + "eval_overall_accuracy": 0.9510883482714468, + "eval_overall_f1": 0.7741046831955923, + "eval_overall_precision": 0.7242268041237113, + "eval_overall_recall": 0.8313609467455622, + "eval_runtime": 0.292, + "eval_samples_per_second": 640.421, + "eval_steps_per_second": 10.274, + "step": 742 + }, + { + "epoch": 8.0, + "grad_norm": 0.5678428411483765, + "learning_rate": 4.600000000000001e-05, + "loss": 0.1552, + "step": 848 + }, + { + "epoch": 8.0, + "eval_LOCATION_f1": 0.8266666666666668, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7654320987654321, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.5891472868217055, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.5352112676056338, + "eval_ORGANIZATION_recall": 0.6551724137931034, + "eval_PERSON_f1": 0.8307692307692307, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.7803468208092486, + "eval_PERSON_recall": 0.8881578947368421, + "eval_QUANTITY_f1": 0.7272727272727272, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8064516129032258, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7575757575757576, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.14812210202217102, + "eval_overall_accuracy": 0.9518565941101153, + "eval_overall_f1": 0.7759562841530055, + "eval_overall_precision": 0.7208121827411168, + "eval_overall_recall": 0.8402366863905325, + "eval_runtime": 0.2864, + "eval_samples_per_second": 652.864, + "eval_steps_per_second": 10.474, + "step": 848 + }, + { + "epoch": 9.0, + "grad_norm": 1.5933927297592163, + "learning_rate": 4.55e-05, + "loss": 0.1486, + "step": 954 + }, + { + "epoch": 9.0, + "eval_LOCATION_f1": 0.8299319727891156, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.782051282051282, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.5853658536585366, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.5538461538461539, + "eval_ORGANIZATION_recall": 0.6206896551724138, + "eval_PERSON_f1": 0.8427672955974843, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8072289156626506, + "eval_PERSON_recall": 0.881578947368421, + "eval_QUANTITY_f1": 0.7575757575757577, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8196721311475409, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1388072818517685, + "eval_overall_accuracy": 0.9544174135723431, + "eval_overall_f1": 0.7860139860139861, + "eval_overall_precision": 0.7453580901856764, + "eval_overall_recall": 0.8313609467455622, + "eval_runtime": 0.2922, + "eval_samples_per_second": 640.03, + "eval_steps_per_second": 10.268, + "step": 954 + }, + { + "epoch": 10.0, + "grad_norm": 1.51088547706604, + "learning_rate": 4.5e-05, + "loss": 0.1369, + "step": 1060 + }, + { + "epoch": 10.0, + "eval_LOCATION_f1": 0.8243243243243245, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7721518987341772, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.6141732283464567, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.5652173913043478, + "eval_ORGANIZATION_recall": 0.6724137931034483, + "eval_PERSON_f1": 0.8348909657320872, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.7928994082840237, + "eval_PERSON_recall": 0.881578947368421, + "eval_QUANTITY_f1": 0.6956521739130435, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6153846153846154, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8253968253968255, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.13908080756664276, + "eval_overall_accuracy": 0.9549295774647887, + "eval_overall_f1": 0.7802197802197802, + "eval_overall_precision": 0.7282051282051282, + "eval_overall_recall": 0.8402366863905325, + "eval_runtime": 0.2918, + "eval_samples_per_second": 640.865, + "eval_steps_per_second": 10.281, + "step": 1060 + }, + { + "epoch": 11.0, + "grad_norm": 1.4912283420562744, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.132, + "step": 1166 + }, + { + "epoch": 11.0, + "eval_LOCATION_f1": 0.8456375838926175, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.6299212598425198, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.5797101449275363, + "eval_ORGANIZATION_recall": 0.6896551724137931, + "eval_PERSON_f1": 0.8473520249221184, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8047337278106509, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7812500000000001, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7142857142857143, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.14170250296592712, + "eval_overall_accuracy": 0.953393085787452, + "eval_overall_f1": 0.7928669410150891, + "eval_overall_precision": 0.7391304347826086, + "eval_overall_recall": 0.8550295857988166, + "eval_runtime": 0.2921, + "eval_samples_per_second": 640.286, + "eval_steps_per_second": 10.272, + "step": 1166 + }, + { + "epoch": 12.0, + "grad_norm": 2.726284980773926, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.1268, + "step": 1272 + }, + { + "epoch": 12.0, + "eval_LOCATION_f1": 0.8378378378378378, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7848101265822784, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.6140350877192983, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.625, + "eval_ORGANIZATION_recall": 0.603448275862069, + "eval_PERSON_f1": 0.8517350157728707, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8181818181818182, + "eval_PERSON_recall": 0.8881578947368421, + "eval_QUANTITY_f1": 0.7878787878787877, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7222222222222222, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8196721311475409, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.13159815967082977, + "eval_overall_accuracy": 0.9577464788732394, + "eval_overall_f1": 0.8016997167138811, + "eval_overall_precision": 0.7690217391304348, + "eval_overall_recall": 0.8372781065088757, + "eval_runtime": 0.2918, + "eval_samples_per_second": 640.768, + "eval_steps_per_second": 10.28, + "step": 1272 + }, + { + "epoch": 13.0, + "grad_norm": 0.7623488903045654, + "learning_rate": 4.35e-05, + "loss": 0.1206, + "step": 1378 + }, + { + "epoch": 13.0, + "eval_LOCATION_f1": 0.8571428571428572, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8076923076923077, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.6115702479338844, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.5873015873015873, + "eval_ORGANIZATION_recall": 0.6379310344827587, + "eval_PERSON_f1": 0.8526645768025078, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8143712574850299, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6756756756756757, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.13245752453804016, + "eval_overall_accuracy": 0.9574903969270167, + "eval_overall_f1": 0.8011204481792716, + "eval_overall_precision": 0.7606382978723404, + "eval_overall_recall": 0.8461538461538461, + "eval_runtime": 0.2913, + "eval_samples_per_second": 641.931, + "eval_steps_per_second": 10.298, + "step": 1378 + }, + { + "epoch": 14.0, + "grad_norm": 3.348172664642334, + "learning_rate": 4.3e-05, + "loss": 0.1186, + "step": 1484 + }, + { + "epoch": 14.0, + "eval_LOCATION_f1": 0.8551724137931034, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8157894736842105, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7299270072992702, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6329113924050633, + "eval_ORGANIZATION_recall": 0.8620689655172413, + "eval_PERSON_f1": 0.8500000000000001, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8095238095238095, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8253968253968255, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.13963258266448975, + "eval_overall_accuracy": 0.9577464788732394, + "eval_overall_f1": 0.8158253751705321, + "eval_overall_precision": 0.7569620253164557, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2864, + "eval_samples_per_second": 652.876, + "eval_steps_per_second": 10.474, + "step": 1484 + }, + { + "epoch": 15.0, + "grad_norm": 1.2007865905761719, + "learning_rate": 4.25e-05, + "loss": 0.1141, + "step": 1590 + }, + { + "epoch": 15.0, + "eval_LOCATION_f1": 0.8611111111111112, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8266666666666667, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.6333333333333333, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6129032258064516, + "eval_ORGANIZATION_recall": 0.6551724137931034, + "eval_PERSON_f1": 0.8481012658227848, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8170731707317073, + "eval_PERSON_recall": 0.881578947368421, + "eval_QUANTITY_f1": 0.7575757575757577, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8064516129032258, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7575757575757576, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1281883716583252, + "eval_overall_accuracy": 0.9582586427656851, + "eval_overall_f1": 0.8022598870056498, + "eval_overall_precision": 0.7675675675675676, + "eval_overall_recall": 0.8402366863905325, + "eval_runtime": 0.2866, + "eval_samples_per_second": 652.52, + "eval_steps_per_second": 10.468, + "step": 1590 + }, + { + "epoch": 16.0, + "grad_norm": 0.8597108721733093, + "learning_rate": 4.2e-05, + "loss": 0.1083, + "step": 1696 + }, + { + "epoch": 16.0, + "eval_LOCATION_f1": 0.8571428571428572, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8076923076923077, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7131782945736435, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.647887323943662, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8526645768025078, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8143712574850299, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7272727272727272, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8253968253968255, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.1355670541524887, + "eval_overall_accuracy": 0.9577464788732394, + "eval_overall_f1": 0.8149171270718233, + "eval_overall_precision": 0.7642487046632125, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.2918, + "eval_samples_per_second": 640.866, + "eval_steps_per_second": 10.281, + "step": 1696 + }, + { + "epoch": 17.0, + "grad_norm": 0.9207751154899597, + "learning_rate": 4.15e-05, + "loss": 0.1106, + "step": 1802 + }, + { + "epoch": 17.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.6612903225806452, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6212121212121212, + "eval_ORGANIZATION_recall": 0.7068965517241379, + "eval_PERSON_f1": 0.8517350157728707, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8181818181818182, + "eval_PERSON_recall": 0.8881578947368421, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8064516129032258, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7575757575757576, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1327420026063919, + "eval_overall_accuracy": 0.9585147247119078, + "eval_overall_f1": 0.8089260808926082, + "eval_overall_precision": 0.7651715039577837, + "eval_overall_recall": 0.8579881656804734, + "eval_runtime": 0.2911, + "eval_samples_per_second": 642.326, + "eval_steps_per_second": 10.305, + "step": 1802 + }, + { + "epoch": 18.0, + "grad_norm": 2.323946952819824, + "learning_rate": 4.1e-05, + "loss": 0.1053, + "step": 1908 + }, + { + "epoch": 18.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.6611570247933884, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6349206349206349, + "eval_ORGANIZATION_recall": 0.6896551724137931, + "eval_PERSON_f1": 0.8500000000000001, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8095238095238095, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7246376811594204, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6410256410256411, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7936507936507937, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7352941176470589, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1359253227710724, + "eval_overall_accuracy": 0.9580025608194622, + "eval_overall_f1": 0.8044382801664354, + "eval_overall_precision": 0.7571801566579635, + "eval_overall_recall": 0.8579881656804734, + "eval_runtime": 0.2865, + "eval_samples_per_second": 652.807, + "eval_steps_per_second": 10.473, + "step": 1908 + }, + { + "epoch": 19.0, + "grad_norm": 24.915828704833984, + "learning_rate": 4.05e-05, + "loss": 0.1089, + "step": 2014 + }, + { + "epoch": 19.0, + "eval_LOCATION_f1": 0.8450704225352113, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.821917808219178, + "eval_LOCATION_recall": 0.8695652173913043, + "eval_ORGANIZATION_f1": 0.584070796460177, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6, + "eval_ORGANIZATION_recall": 0.5689655172413793, + "eval_PERSON_f1": 0.8471337579617835, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8209876543209876, + "eval_PERSON_recall": 0.875, + "eval_QUANTITY_f1": 0.8, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7428571428571429, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.12435611337423325, + "eval_overall_accuracy": 0.9595390524967989, + "eval_overall_f1": 0.8023088023088024, + "eval_overall_precision": 0.7830985915492957, + "eval_overall_recall": 0.8224852071005917, + "eval_runtime": 0.2922, + "eval_samples_per_second": 640.081, + "eval_steps_per_second": 10.269, + "step": 2014 + }, + { + "epoch": 20.0, + "grad_norm": 0.8912884593009949, + "learning_rate": 4e-05, + "loss": 0.1046, + "step": 2120 + }, + { + "epoch": 20.0, + "eval_LOCATION_f1": 0.863013698630137, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.6779661016949153, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.6896551724137931, + "eval_PERSON_f1": 0.85625, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8154761904761905, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7575757575757577, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7741935483870968, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7272727272727273, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.12911909818649292, + "eval_overall_accuracy": 0.9600512163892445, + "eval_overall_f1": 0.8117977528089887, + "eval_overall_precision": 0.7727272727272727, + "eval_overall_recall": 0.8550295857988166, + "eval_runtime": 0.2932, + "eval_samples_per_second": 637.792, + "eval_steps_per_second": 10.232, + "step": 2120 + }, + { + "epoch": 21.0, + "grad_norm": 0.48920294642448425, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.0965, + "step": 2226 + }, + { + "epoch": 21.0, + "eval_LOCATION_f1": 0.84, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7777777777777778, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7424242424242424, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6621621621621622, + "eval_ORGANIZATION_recall": 0.8448275862068966, + "eval_PERSON_f1": 0.875, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8333333333333334, + "eval_PERSON_recall": 0.9210526315789473, + "eval_QUANTITY_f1": 0.7575757575757577, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7741935483870968, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7272727272727273, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1389300525188446, + "eval_overall_accuracy": 0.9572343149807938, + "eval_overall_f1": 0.8246575342465754, + "eval_overall_precision": 0.7678571428571429, + "eval_overall_recall": 0.8905325443786982, + "eval_runtime": 0.2936, + "eval_samples_per_second": 637.017, + "eval_steps_per_second": 10.22, + "step": 2226 + }, + { + "epoch": 22.0, + "grad_norm": 1.2225427627563477, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.0958, + "step": 2332 + }, + { + "epoch": 22.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7633587786259542, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.684931506849315, + "eval_ORGANIZATION_recall": 0.8620689655172413, + "eval_PERSON_f1": 0.8544303797468354, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.823170731707317, + "eval_PERSON_recall": 0.8881578947368421, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1278899759054184, + "eval_overall_accuracy": 0.9618437900128041, + "eval_overall_f1": 0.8328690807799444, + "eval_overall_precision": 0.7868421052631579, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2942, + "eval_samples_per_second": 635.574, + "eval_steps_per_second": 10.196, + "step": 2332 + }, + { + "epoch": 23.0, + "grad_norm": 0.5433982014656067, + "learning_rate": 3.85e-05, + "loss": 0.0929, + "step": 2438 + }, + { + "epoch": 23.0, + "eval_LOCATION_f1": 0.8591549295774648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8356164383561644, + "eval_LOCATION_recall": 0.8840579710144928, + "eval_ORGANIZATION_f1": 0.6499999999999999, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6290322580645161, + "eval_ORGANIZATION_recall": 0.6724137931034483, + "eval_PERSON_f1": 0.8616352201257862, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8253012048192772, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.12278664112091064, + "eval_overall_accuracy": 0.9628681177976952, + "eval_overall_f1": 0.8176638176638177, + "eval_overall_precision": 0.7884615384615384, + "eval_overall_recall": 0.849112426035503, + "eval_runtime": 0.2929, + "eval_samples_per_second": 638.552, + "eval_steps_per_second": 10.244, + "step": 2438 + }, + { + "epoch": 24.0, + "grad_norm": 1.4073636531829834, + "learning_rate": 3.8e-05, + "loss": 0.0934, + "step": 2544 + }, + { + "epoch": 24.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.746268656716418, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6578947368421053, + "eval_ORGANIZATION_recall": 0.8620689655172413, + "eval_PERSON_f1": 0.8509316770186336, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8058823529411765, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.8125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7647058823529411, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8196721311475409, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.13569365441799164, + "eval_overall_accuracy": 0.9595390524967989, + "eval_overall_f1": 0.8285322359396433, + "eval_overall_precision": 0.7723785166240409, + "eval_overall_recall": 0.893491124260355, + "eval_runtime": 0.2923, + "eval_samples_per_second": 639.683, + "eval_steps_per_second": 10.262, + "step": 2544 + }, + { + "epoch": 25.0, + "grad_norm": 2.3010127544403076, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0887, + "step": 2650 + }, + { + "epoch": 25.0, + "eval_LOCATION_f1": 0.8435374149659864, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7948717948717948, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7166666666666668, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6935483870967742, + "eval_ORGANIZATION_recall": 0.7413793103448276, + "eval_PERSON_f1": 0.8535031847133758, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8271604938271605, + "eval_PERSON_recall": 0.881578947368421, + "eval_QUANTITY_f1": 0.8125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7647058823529411, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.12316982448101044, + "eval_overall_accuracy": 0.9615877080665813, + "eval_overall_f1": 0.8198581560283689, + "eval_overall_precision": 0.7874659400544959, + "eval_overall_recall": 0.8550295857988166, + "eval_runtime": 0.2928, + "eval_samples_per_second": 638.601, + "eval_steps_per_second": 10.245, + "step": 2650 + }, + { + "epoch": 26.0, + "grad_norm": 1.5638922452926636, + "learning_rate": 3.7e-05, + "loss": 0.1025, + "step": 2756 + }, + { + "epoch": 26.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7716535433070866, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.7101449275362319, + "eval_ORGANIZATION_recall": 0.8448275862068966, + "eval_PERSON_f1": 0.8616352201257862, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8253012048192772, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1349707692861557, + "eval_overall_accuracy": 0.9618437900128041, + "eval_overall_f1": 0.8312412831241283, + "eval_overall_precision": 0.7862796833773087, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.2929, + "eval_samples_per_second": 638.525, + "eval_steps_per_second": 10.244, + "step": 2756 + }, + { + "epoch": 27.0, + "grad_norm": 1.3527199029922485, + "learning_rate": 3.65e-05, + "loss": 0.0868, + "step": 2862 + }, + { + "epoch": 27.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7777777777777778, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.7205882352941176, + "eval_ORGANIZATION_recall": 0.8448275862068966, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7761194029850748, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7027027027027027, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.7868852459016394, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1333594173192978, + "eval_overall_accuracy": 0.9613316261203585, + "eval_overall_f1": 0.8328690807799444, + "eval_overall_precision": 0.7868421052631579, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2923, + "eval_samples_per_second": 639.845, + "eval_steps_per_second": 10.265, + "step": 2862 + }, + { + "epoch": 28.0, + "grad_norm": 2.2402167320251465, + "learning_rate": 3.6e-05, + "loss": 0.0836, + "step": 2968 + }, + { + "epoch": 28.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7258064516129032, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6818181818181818, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8616352201257862, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8253012048192772, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7575757575757577, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7419354838709677, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.696969696969697, + "eval_TIME_recall": 0.7931034482758621, + "eval_loss": 0.1318032443523407, + "eval_overall_accuracy": 0.9620998719590269, + "eval_overall_f1": 0.8212290502793296, + "eval_overall_precision": 0.7777777777777778, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.2923, + "eval_samples_per_second": 639.756, + "eval_steps_per_second": 10.263, + "step": 2968 + }, + { + "epoch": 29.0, + "grad_norm": 1.1857041120529175, + "learning_rate": 3.55e-05, + "loss": 0.0791, + "step": 3074 + }, + { + "epoch": 29.0, + "eval_LOCATION_f1": 0.8513513513513514, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7974683544303798, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.746031746031746, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6911764705882353, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.85625, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8154761904761905, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.13442517817020416, + "eval_overall_accuracy": 0.9618437900128041, + "eval_overall_f1": 0.8256624825662484, + "eval_overall_precision": 0.7810026385224275, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.2926, + "eval_samples_per_second": 639.174, + "eval_steps_per_second": 10.254, + "step": 3074 + }, + { + "epoch": 30.0, + "grad_norm": 0.6588788032531738, + "learning_rate": 3.5e-05, + "loss": 0.0792, + "step": 3180 + }, + { + "epoch": 30.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7058823529411765, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6885245901639344, + "eval_ORGANIZATION_recall": 0.7241379310344828, + "eval_PERSON_f1": 0.8643533123028391, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.12328661233186722, + "eval_overall_accuracy": 0.9651728553137003, + "eval_overall_f1": 0.8312056737588652, + "eval_overall_precision": 0.7983651226158038, + "eval_overall_recall": 0.8668639053254438, + "eval_runtime": 0.287, + "eval_samples_per_second": 651.586, + "eval_steps_per_second": 10.453, + "step": 3180 + }, + { + "epoch": 31.0, + "grad_norm": 0.4727123975753784, + "learning_rate": 3.45e-05, + "loss": 0.0772, + "step": 3286 + }, + { + "epoch": 31.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7317073170731708, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6923076923076923, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8652037617554859, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8263473053892215, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8135593220338982, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1277562379837036, + "eval_overall_accuracy": 0.9646606914212548, + "eval_overall_f1": 0.8326300984528833, + "eval_overall_precision": 0.7935656836461126, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.2916, + "eval_samples_per_second": 641.192, + "eval_steps_per_second": 10.287, + "step": 3286 + }, + { + "epoch": 32.0, + "grad_norm": 0.8663144111633301, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.0766, + "step": 3392 + }, + { + "epoch": 32.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7244094488188977, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8544303797468354, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.823170731707317, + "eval_PERSON_recall": 0.8881578947368421, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8135593220338982, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.12748569250106812, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8258426966292135, + "eval_overall_precision": 0.786096256684492, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.287, + "eval_samples_per_second": 651.573, + "eval_steps_per_second": 10.453, + "step": 3392 + }, + { + "epoch": 33.0, + "grad_norm": 0.6088622808456421, + "learning_rate": 3.35e-05, + "loss": 0.0746, + "step": 3498 + }, + { + "epoch": 33.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7401574803149606, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6811594202898551, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8507936507936509, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8220858895705522, + "eval_PERSON_recall": 0.881578947368421, + "eval_QUANTITY_f1": 0.8253968253968254, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7878787878787878, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8135593220338982, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.13140520453453064, + "eval_overall_accuracy": 0.9636363636363636, + "eval_overall_f1": 0.8286516853932584, + "eval_overall_precision": 0.7887700534759359, + "eval_overall_recall": 0.8727810650887574, + "eval_runtime": 0.293, + "eval_samples_per_second": 638.168, + "eval_steps_per_second": 10.238, + "step": 3498 + }, + { + "epoch": 34.0, + "grad_norm": 1.8929836750030518, + "learning_rate": 3.3e-05, + "loss": 0.072, + "step": 3604 + }, + { + "epoch": 34.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7559055118110236, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6956521739130435, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.8626198083067094, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8385093167701864, + "eval_PERSON_recall": 0.8881578947368421, + "eval_QUANTITY_f1": 0.8253968253968254, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7878787878787878, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8135593220338982, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.13353487849235535, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8366197183098592, + "eval_overall_precision": 0.7983870967741935, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.293, + "eval_samples_per_second": 638.242, + "eval_steps_per_second": 10.239, + "step": 3604 + }, + { + "epoch": 35.0, + "grad_norm": 1.197149634361267, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.0712, + "step": 3710 + }, + { + "epoch": 35.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7377049180327868, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.703125, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8625, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8214285714285714, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.8, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7428571428571429, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.13137878477573395, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8347338935574229, + "eval_overall_precision": 0.7925531914893617, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.2864, + "eval_samples_per_second": 652.975, + "eval_steps_per_second": 10.476, + "step": 3710 + }, + { + "epoch": 36.0, + "grad_norm": 0.49242687225341797, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.0702, + "step": 3816 + }, + { + "epoch": 36.0, + "eval_LOCATION_f1": 0.8611111111111112, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8266666666666667, + "eval_LOCATION_recall": 0.8985507246376812, + "eval_ORGANIZATION_f1": 0.7230769230769231, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6527777777777778, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8626198083067094, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8385093167701864, + "eval_PERSON_recall": 0.8881578947368421, + "eval_QUANTITY_f1": 0.8253968253968254, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7878787878787878, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8135593220338982, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.13032355904579163, + "eval_overall_accuracy": 0.9641485275288092, + "eval_overall_f1": 0.8293370944992947, + "eval_overall_precision": 0.7924528301886793, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.2939, + "eval_samples_per_second": 636.252, + "eval_steps_per_second": 10.207, + "step": 3816 + }, + { + "epoch": 37.0, + "grad_norm": 2.0146589279174805, + "learning_rate": 3.15e-05, + "loss": 0.0695, + "step": 3922 + }, + { + "epoch": 37.0, + "eval_LOCATION_f1": 0.882758620689655, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8421052631578947, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7580645161290323, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.7121212121212122, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8616352201257862, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8253012048192772, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.12965013086795807, + "eval_overall_accuracy": 0.9649167733674776, + "eval_overall_f1": 0.840620592383639, + "eval_overall_precision": 0.8032345013477089, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.2929, + "eval_samples_per_second": 638.49, + "eval_steps_per_second": 10.243, + "step": 3922 + }, + { + "epoch": 38.0, + "grad_norm": 0.8112408518791199, + "learning_rate": 3.1e-05, + "loss": 0.0674, + "step": 4028 + }, + { + "epoch": 38.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7419354838709677, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.696969696969697, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8706624605678233, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8363636363636363, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7536231884057971, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1362360268831253, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8363636363636363, + "eval_overall_precision": 0.7931034482758621, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2929, + "eval_samples_per_second": 638.407, + "eval_steps_per_second": 10.242, + "step": 4028 + }, + { + "epoch": 39.0, + "grad_norm": 0.4772998094558716, + "learning_rate": 3.05e-05, + "loss": 0.0653, + "step": 4134 + }, + { + "epoch": 39.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.6942148760330579, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7241379310344828, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.12953267991542816, + "eval_overall_accuracy": 0.964404609475032, + "eval_overall_f1": 0.8271954674220963, + "eval_overall_precision": 0.7934782608695652, + "eval_overall_recall": 0.863905325443787, + "eval_runtime": 0.2928, + "eval_samples_per_second": 638.701, + "eval_steps_per_second": 10.247, + "step": 4134 + }, + { + "epoch": 40.0, + "grad_norm": 1.1962645053863525, + "learning_rate": 3e-05, + "loss": 0.0637, + "step": 4240 + }, + { + "epoch": 40.0, + "eval_LOCATION_f1": 0.8533333333333333, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7901234567901234, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7258064516129032, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6818181818181818, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8761904761904762, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8466257668711656, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.8125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7647058823529411, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.13492895662784576, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8330995792426368, + "eval_overall_precision": 0.792, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2867, + "eval_samples_per_second": 652.21, + "eval_steps_per_second": 10.463, + "step": 4240 + }, + { + "epoch": 41.0, + "grad_norm": 0.6804232597351074, + "learning_rate": 2.95e-05, + "loss": 0.0634, + "step": 4346 + }, + { + "epoch": 41.0, + "eval_LOCATION_f1": 0.8749999999999999, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.84, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7619047619047619, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.7058823529411765, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.8742138364779874, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8373493975903614, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.1353294402360916, + "eval_overall_accuracy": 0.9651728553137003, + "eval_overall_f1": 0.8410689170182841, + "eval_overall_precision": 0.8016085790884718, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2941, + "eval_samples_per_second": 635.842, + "eval_steps_per_second": 10.201, + "step": 4346 + }, + { + "epoch": 42.0, + "grad_norm": 1.3958626985549927, + "learning_rate": 2.9e-05, + "loss": 0.0622, + "step": 4452 + }, + { + "epoch": 42.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7154471544715447, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.676923076923077, + "eval_ORGANIZATION_recall": 0.7586206896551724, + "eval_PERSON_f1": 0.8769716088328074, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8424242424242424, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.13502123951911926, + "eval_overall_accuracy": 0.9649167733674776, + "eval_overall_f1": 0.8377997179125528, + "eval_overall_precision": 0.8005390835579514, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2888, + "eval_samples_per_second": 647.481, + "eval_steps_per_second": 10.387, + "step": 4452 + }, + { + "epoch": 43.0, + "grad_norm": 1.7736326456069946, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.0616, + "step": 4558 + }, + { + "epoch": 43.0, + "eval_LOCATION_f1": 0.882758620689655, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8421052631578947, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.752, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.7014925373134329, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8742138364779874, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8373493975903614, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.8125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7647058823529411, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1341027021408081, + "eval_overall_accuracy": 0.9654289372599232, + "eval_overall_f1": 0.8455056179775281, + "eval_overall_precision": 0.8048128342245989, + "eval_overall_recall": 0.8905325443786982, + "eval_runtime": 0.2927, + "eval_samples_per_second": 638.914, + "eval_steps_per_second": 10.25, + "step": 4558 + }, + { + "epoch": 44.0, + "grad_norm": 4.0435028076171875, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.0603, + "step": 4664 + }, + { + "epoch": 44.0, + "eval_LOCATION_f1": 0.8476821192052981, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.7804878048780488, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7286821705426356, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6619718309859155, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8714733542319749, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8323353293413174, + "eval_PERSON_recall": 0.9144736842105263, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.14106552302837372, + "eval_overall_accuracy": 0.9628681177976952, + "eval_overall_f1": 0.8310249307479224, + "eval_overall_precision": 0.78125, + "eval_overall_recall": 0.8875739644970414, + "eval_runtime": 0.2883, + "eval_samples_per_second": 648.632, + "eval_steps_per_second": 10.406, + "step": 4664 + }, + { + "epoch": 45.0, + "grad_norm": 1.2470722198486328, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.0597, + "step": 4770 + }, + { + "epoch": 45.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7286821705426356, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6619718309859155, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8652037617554859, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8263473053892215, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7352941176470588, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6578947368421053, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.14016938209533691, + "eval_overall_accuracy": 0.9620998719590269, + "eval_overall_f1": 0.8243430152143846, + "eval_overall_precision": 0.7740259740259741, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.2928, + "eval_samples_per_second": 638.618, + "eval_steps_per_second": 10.245, + "step": 4770 + }, + { + "epoch": 46.0, + "grad_norm": 0.7653511762619019, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.059, + "step": 4876 + }, + { + "epoch": 46.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.736, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6865671641791045, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8670886075949367, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.13417667150497437, + "eval_overall_accuracy": 0.9649167733674776, + "eval_overall_f1": 0.8326300984528833, + "eval_overall_precision": 0.7935656836461126, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.2925, + "eval_samples_per_second": 639.217, + "eval_steps_per_second": 10.255, + "step": 4876 + }, + { + "epoch": 47.0, + "grad_norm": 1.0474615097045898, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.0574, + "step": 4982 + }, + { + "epoch": 47.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.736, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6865671641791045, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7575757575757577, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7796610169491527, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7666666666666667, + "eval_TIME_recall": 0.7931034482758621, + "eval_loss": 0.13553491234779358, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8258426966292135, + "eval_overall_precision": 0.786096256684492, + "eval_overall_recall": 0.8698224852071006, + "eval_runtime": 0.2928, + "eval_samples_per_second": 638.657, + "eval_steps_per_second": 10.246, + "step": 4982 + }, + { + "epoch": 48.0, + "grad_norm": 1.1858253479003906, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.0581, + "step": 5088 + }, + { + "epoch": 48.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7377049180327868, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.703125, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8670886075949367, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.13520203530788422, + "eval_overall_accuracy": 0.9659411011523688, + "eval_overall_f1": 0.8389830508474576, + "eval_overall_precision": 0.8027027027027027, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2932, + "eval_samples_per_second": 637.868, + "eval_steps_per_second": 10.233, + "step": 5088 + }, + { + "epoch": 49.0, + "grad_norm": 0.4307897090911865, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.0561, + "step": 5194 + }, + { + "epoch": 49.0, + "eval_LOCATION_f1": 0.882758620689655, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8421052631578947, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7301587301587301, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6764705882352942, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8679245283018868, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8313253012048193, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.13861991465091705, + "eval_overall_accuracy": 0.9649167733674776, + "eval_overall_f1": 0.8398876404494382, + "eval_overall_precision": 0.7994652406417112, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2889, + "eval_samples_per_second": 647.316, + "eval_steps_per_second": 10.385, + "step": 5194 + }, + { + "epoch": 50.0, + "grad_norm": 0.8664003014564514, + "learning_rate": 2.5e-05, + "loss": 0.0544, + "step": 5300 + }, + { + "epoch": 50.0, + "eval_LOCATION_f1": 0.8749999999999999, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.84, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.736, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6865671641791045, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8471337579617835, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8209876543209876, + "eval_PERSON_recall": 0.875, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8666666666666667, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.13451269268989563, + "eval_overall_accuracy": 0.9651728553137003, + "eval_overall_f1": 0.8312056737588652, + "eval_overall_precision": 0.7983651226158038, + "eval_overall_recall": 0.8668639053254438, + "eval_runtime": 0.2869, + "eval_samples_per_second": 651.752, + "eval_steps_per_second": 10.456, + "step": 5300 + }, + { + "epoch": 51.0, + "grad_norm": 3.4700815677642822, + "learning_rate": 2.45e-05, + "loss": 0.0534, + "step": 5406 + }, + { + "epoch": 51.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7692307692307692, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6944444444444444, + "eval_ORGANIZATION_recall": 0.8620689655172413, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8343558282208589, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.847457627118644, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.13688045740127563, + "eval_overall_accuracy": 0.9651728553137003, + "eval_overall_f1": 0.841514726507714, + "eval_overall_precision": 0.8, + "eval_overall_recall": 0.8875739644970414, + "eval_runtime": 0.2922, + "eval_samples_per_second": 640.064, + "eval_steps_per_second": 10.268, + "step": 5406 + }, + { + "epoch": 52.0, + "grad_norm": 1.2102717161178589, + "learning_rate": 2.4e-05, + "loss": 0.0521, + "step": 5512 + }, + { + "epoch": 52.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7669172932330827, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.68, + "eval_ORGANIZATION_recall": 0.8793103448275862, + "eval_PERSON_f1": 0.8643533123028391, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.14608342945575714, + "eval_overall_accuracy": 0.9628681177976952, + "eval_overall_f1": 0.8337950138504154, + "eval_overall_precision": 0.7838541666666666, + "eval_overall_recall": 0.8905325443786982, + "eval_runtime": 0.2926, + "eval_samples_per_second": 639.189, + "eval_steps_per_second": 10.254, + "step": 5512 + }, + { + "epoch": 53.0, + "grad_norm": 1.385406732559204, + "learning_rate": 2.35e-05, + "loss": 0.0524, + "step": 5618 + }, + { + "epoch": 53.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.768, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.7164179104477612, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.8670886075949367, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1376073956489563, + "eval_overall_accuracy": 0.9656850192061459, + "eval_overall_f1": 0.8410689170182841, + "eval_overall_precision": 0.8016085790884718, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2926, + "eval_samples_per_second": 639.014, + "eval_steps_per_second": 10.252, + "step": 5618 + }, + { + "epoch": 54.0, + "grad_norm": 0.37542441487312317, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.0494, + "step": 5724 + }, + { + "epoch": 54.0, + "eval_LOCATION_f1": 0.882758620689655, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8421052631578947, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7401574803149606, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6811594202898551, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8670886075949367, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.8125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7647058823529411, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.14096996188163757, + "eval_overall_accuracy": 0.9659411011523688, + "eval_overall_f1": 0.8398876404494382, + "eval_overall_precision": 0.7994652406417112, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2945, + "eval_samples_per_second": 635.027, + "eval_steps_per_second": 10.188, + "step": 5724 + }, + { + "epoch": 55.0, + "grad_norm": 2.3441622257232666, + "learning_rate": 2.25e-05, + "loss": 0.0516, + "step": 5830 + }, + { + "epoch": 55.0, + "eval_LOCATION_f1": 0.882758620689655, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8421052631578947, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7559055118110236, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6956521739130435, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.8670886075949367, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.8125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7647058823529411, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.13822495937347412, + "eval_overall_accuracy": 0.9659411011523688, + "eval_overall_f1": 0.8455056179775281, + "eval_overall_precision": 0.8048128342245989, + "eval_overall_recall": 0.8905325443786982, + "eval_runtime": 0.2928, + "eval_samples_per_second": 638.657, + "eval_steps_per_second": 10.246, + "step": 5830 + }, + { + "epoch": 56.0, + "grad_norm": 0.9206390976905823, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.0497, + "step": 5936 + }, + { + "epoch": 56.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7200000000000001, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6716417910447762, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8571428571428571, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8282208588957055, + "eval_PERSON_recall": 0.8881578947368421, + "eval_QUANTITY_f1": 0.8125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7647058823529411, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.13933928310871124, + "eval_overall_accuracy": 0.9654289372599232, + "eval_overall_f1": 0.8338028169014083, + "eval_overall_precision": 0.7956989247311828, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.293, + "eval_samples_per_second": 638.123, + "eval_steps_per_second": 10.237, + "step": 5936 + }, + { + "epoch": 57.0, + "grad_norm": 0.8336784839630127, + "learning_rate": 2.15e-05, + "loss": 0.048, + "step": 6042 + }, + { + "epoch": 57.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.752, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.7014925373134329, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.8125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7647058823529411, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.14417044818401337, + "eval_overall_accuracy": 0.9649167733674776, + "eval_overall_f1": 0.8347338935574229, + "eval_overall_precision": 0.7925531914893617, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.2919, + "eval_samples_per_second": 640.597, + "eval_steps_per_second": 10.277, + "step": 6042 + }, + { + "epoch": 58.0, + "grad_norm": 1.4807782173156738, + "learning_rate": 2.1e-05, + "loss": 0.0474, + "step": 6148 + }, + { + "epoch": 58.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7480916030534351, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6712328767123288, + "eval_ORGANIZATION_recall": 0.8448275862068966, + "eval_PERSON_f1": 0.8589341692789968, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8203592814371258, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.14203424751758575, + "eval_overall_accuracy": 0.9646606914212548, + "eval_overall_f1": 0.8372739916550764, + "eval_overall_precision": 0.7900262467191601, + "eval_overall_recall": 0.8905325443786982, + "eval_runtime": 0.2926, + "eval_samples_per_second": 639.123, + "eval_steps_per_second": 10.253, + "step": 6148 + }, + { + "epoch": 59.0, + "grad_norm": 1.1692790985107422, + "learning_rate": 2.05e-05, + "loss": 0.0447, + "step": 6254 + }, + { + "epoch": 59.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.75, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6857142857142857, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.8643533123028391, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1469813734292984, + "eval_overall_accuracy": 0.9646606914212548, + "eval_overall_f1": 0.8363636363636363, + "eval_overall_precision": 0.7931034482758621, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.293, + "eval_samples_per_second": 638.282, + "eval_steps_per_second": 10.24, + "step": 6254 + }, + { + "epoch": 60.0, + "grad_norm": 0.8121991157531738, + "learning_rate": 2e-05, + "loss": 0.045, + "step": 6360 + }, + { + "epoch": 60.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7301587301587301, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6764705882352942, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.8125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7647058823529411, + "eval_QUANTITY_recall": 0.8666666666666667, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.14526785910129547, + "eval_overall_accuracy": 0.9649167733674776, + "eval_overall_f1": 0.8330995792426368, + "eval_overall_precision": 0.792, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2933, + "eval_samples_per_second": 637.529, + "eval_steps_per_second": 10.228, + "step": 6360 + }, + { + "epoch": 61.0, + "grad_norm": 0.9558350443840027, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.0441, + "step": 6466 + }, + { + "epoch": 61.0, + "eval_LOCATION_f1": 0.8749999999999999, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.84, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7244094488188977, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8343558282208589, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.1430891752243042, + "eval_overall_accuracy": 0.9649167733674776, + "eval_overall_f1": 0.8349788434414668, + "eval_overall_precision": 0.7978436657681941, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.286, + "eval_samples_per_second": 653.776, + "eval_steps_per_second": 10.488, + "step": 6466 + }, + { + "epoch": 62.0, + "grad_norm": 1.2547602653503418, + "learning_rate": 1.9e-05, + "loss": 0.0474, + "step": 6572 + }, + { + "epoch": 62.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.71875, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6571428571428571, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8616352201257862, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8253012048192772, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1453644037246704, + "eval_overall_accuracy": 0.9646606914212548, + "eval_overall_f1": 0.8307692307692308, + "eval_overall_precision": 0.7877984084880637, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.286, + "eval_samples_per_second": 653.787, + "eval_steps_per_second": 10.489, + "step": 6572 + }, + { + "epoch": 63.0, + "grad_norm": 0.2882705628871918, + "learning_rate": 1.85e-05, + "loss": 0.0441, + "step": 6678 + }, + { + "epoch": 63.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7258064516129032, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6818181818181818, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8706624605678233, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8363636363636363, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.14318965375423431, + "eval_overall_accuracy": 0.9651728553137003, + "eval_overall_f1": 0.8382559774964837, + "eval_overall_precision": 0.7989276139410187, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.2869, + "eval_samples_per_second": 651.819, + "eval_steps_per_second": 10.457, + "step": 6678 + }, + { + "epoch": 64.0, + "grad_norm": 1.046625018119812, + "learning_rate": 1.8e-05, + "loss": 0.0449, + "step": 6784 + }, + { + "epoch": 64.0, + "eval_LOCATION_f1": 0.8590604026845639, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7034482758620689, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.5862068965517241, + "eval_ORGANIZATION_recall": 0.8793103448275862, + "eval_PERSON_f1": 0.8598130841121495, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8165680473372781, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.7575757575757577, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6944444444444444, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.16219820082187653, + "eval_overall_accuracy": 0.9590268886043534, + "eval_overall_f1": 0.8178137651821863, + "eval_overall_precision": 0.7518610421836228, + "eval_overall_recall": 0.8964497041420119, + "eval_runtime": 0.2859, + "eval_samples_per_second": 654.058, + "eval_steps_per_second": 10.493, + "step": 6784 + }, + { + "epoch": 65.0, + "grad_norm": 1.8243094682693481, + "learning_rate": 1.75e-05, + "loss": 0.0452, + "step": 6890 + }, + { + "epoch": 65.0, + "eval_LOCATION_f1": 0.882758620689655, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8421052631578947, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7424242424242424, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6621621621621622, + "eval_ORGANIZATION_recall": 0.8448275862068966, + "eval_PERSON_f1": 0.85625, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8154761904761905, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.14889490604400635, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8349514563106797, + "eval_overall_precision": 0.7859007832898173, + "eval_overall_recall": 0.8905325443786982, + "eval_runtime": 0.2885, + "eval_samples_per_second": 648.163, + "eval_steps_per_second": 10.398, + "step": 6890 + }, + { + "epoch": 66.0, + "grad_norm": 0.7773322463035583, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.0418, + "step": 6996 + }, + { + "epoch": 66.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7480916030534351, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6712328767123288, + "eval_ORGANIZATION_recall": 0.8448275862068966, + "eval_PERSON_f1": 0.8625, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8214285714285714, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.15430662035942078, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8349514563106797, + "eval_overall_precision": 0.7859007832898173, + "eval_overall_recall": 0.8905325443786982, + "eval_runtime": 0.2923, + "eval_samples_per_second": 639.682, + "eval_steps_per_second": 10.262, + "step": 6996 + }, + { + "epoch": 67.0, + "grad_norm": 0.9151946306228638, + "learning_rate": 1.65e-05, + "loss": 0.0421, + "step": 7102 + }, + { + "epoch": 67.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7538461538461538, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6805555555555556, + "eval_ORGANIZATION_recall": 0.8448275862068966, + "eval_PERSON_f1": 0.8589341692789968, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8203592814371258, + "eval_PERSON_recall": 0.9013157894736842, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.15378881990909576, + "eval_overall_accuracy": 0.963124199743918, + "eval_overall_f1": 0.8321775312066575, + "eval_overall_precision": 0.783289817232376, + "eval_overall_recall": 0.8875739644970414, + "eval_runtime": 0.2921, + "eval_samples_per_second": 640.251, + "eval_steps_per_second": 10.271, + "step": 7102 + }, + { + "epoch": 68.0, + "grad_norm": 0.4768742322921753, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.041, + "step": 7208 + }, + { + "epoch": 68.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7518796992481203, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.8620689655172413, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.15490765869617462, + "eval_overall_accuracy": 0.9626120358514725, + "eval_overall_f1": 0.8294036061026352, + "eval_overall_precision": 0.7806788511749347, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2858, + "eval_samples_per_second": 654.293, + "eval_steps_per_second": 10.497, + "step": 7208 + }, + { + "epoch": 69.0, + "grad_norm": 1.6758042573928833, + "learning_rate": 1.55e-05, + "loss": 0.0411, + "step": 7314 + }, + { + "epoch": 69.0, + "eval_LOCATION_f1": 0.882758620689655, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8421052631578947, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7401574803149606, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6811594202898551, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8553459119496855, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8192771084337349, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15107131004333496, + "eval_overall_accuracy": 0.9651728553137003, + "eval_overall_f1": 0.8347338935574229, + "eval_overall_precision": 0.7925531914893617, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.2937, + "eval_samples_per_second": 636.611, + "eval_steps_per_second": 10.213, + "step": 7314 + }, + { + "epoch": 70.0, + "grad_norm": 1.7515956163406372, + "learning_rate": 1.5e-05, + "loss": 0.0414, + "step": 7420 + }, + { + "epoch": 70.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7301587301587301, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6764705882352942, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8625, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8214285714285714, + "eval_PERSON_recall": 0.9078947368421053, + "eval_QUANTITY_f1": 0.746268656716418, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6756756756756757, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.7999999999999999, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.7741935483870968, + "eval_TIME_recall": 0.8275862068965517, + "eval_loss": 0.15384884178638458, + "eval_overall_accuracy": 0.963124199743918, + "eval_overall_f1": 0.8250000000000001, + "eval_overall_precision": 0.7774869109947644, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2872, + "eval_samples_per_second": 651.192, + "eval_steps_per_second": 10.447, + "step": 7420 + }, + { + "epoch": 71.0, + "grad_norm": 1.0327603816986084, + "learning_rate": 1.45e-05, + "loss": 0.0407, + "step": 7526 + }, + { + "epoch": 71.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7384615384615385, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8343558282208589, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.1484140008687973, + "eval_overall_accuracy": 0.9649167733674776, + "eval_overall_f1": 0.8375350140056023, + "eval_overall_precision": 0.7952127659574468, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2923, + "eval_samples_per_second": 639.759, + "eval_steps_per_second": 10.264, + "step": 7526 + }, + { + "epoch": 72.0, + "grad_norm": 1.021155595779419, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.0395, + "step": 7632 + }, + { + "epoch": 72.0, + "eval_LOCATION_f1": 0.8749999999999999, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.84, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7419354838709677, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.696969696969697, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8343558282208589, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.14668720960617065, + "eval_overall_accuracy": 0.9649167733674776, + "eval_overall_f1": 0.8385269121813032, + "eval_overall_precision": 0.8043478260869565, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.2936, + "eval_samples_per_second": 636.931, + "eval_steps_per_second": 10.218, + "step": 7632 + }, + { + "epoch": 73.0, + "grad_norm": 0.8261666893959045, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.0388, + "step": 7738 + }, + { + "epoch": 73.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7441860465116279, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.676056338028169, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8343558282208589, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7936507936507938, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7575757575757576, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.14678049087524414, + "eval_overall_accuracy": 0.9651728553137003, + "eval_overall_f1": 0.8387096774193548, + "eval_overall_precision": 0.7973333333333333, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2943, + "eval_samples_per_second": 635.507, + "eval_steps_per_second": 10.195, + "step": 7738 + }, + { + "epoch": 74.0, + "grad_norm": 1.6623172760009766, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.0401, + "step": 7844 + }, + { + "epoch": 74.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7441860465116279, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.676056338028169, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8343558282208589, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15321803092956543, + "eval_overall_accuracy": 0.9646606914212548, + "eval_overall_f1": 0.8363636363636363, + "eval_overall_precision": 0.7931034482758621, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2923, + "eval_samples_per_second": 639.734, + "eval_steps_per_second": 10.263, + "step": 7844 + }, + { + "epoch": 75.0, + "grad_norm": 0.745629608631134, + "learning_rate": 1.25e-05, + "loss": 0.0376, + "step": 7950 + }, + { + "epoch": 75.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.71875, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6571428571428571, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15535052120685577, + "eval_overall_accuracy": 0.9636363636363636, + "eval_overall_f1": 0.829608938547486, + "eval_overall_precision": 0.7857142857142857, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2924, + "eval_samples_per_second": 639.471, + "eval_steps_per_second": 10.259, + "step": 7950 + }, + { + "epoch": 76.0, + "grad_norm": 1.3148008584976196, + "learning_rate": 1.2e-05, + "loss": 0.0389, + "step": 8056 + }, + { + "epoch": 76.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7480916030534351, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6712328767123288, + "eval_ORGANIZATION_recall": 0.8448275862068966, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8343558282208589, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15534979104995728, + "eval_overall_accuracy": 0.963124199743918, + "eval_overall_f1": 0.8328690807799444, + "eval_overall_precision": 0.7868421052631579, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2928, + "eval_samples_per_second": 638.704, + "eval_steps_per_second": 10.247, + "step": 8056 + }, + { + "epoch": 77.0, + "grad_norm": 0.5751745104789734, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.0385, + "step": 8162 + }, + { + "epoch": 77.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7538461538461538, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6805555555555556, + "eval_ORGANIZATION_recall": 0.8448275862068966, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8343558282208589, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.1525815725326538, + "eval_overall_accuracy": 0.964404609475032, + "eval_overall_f1": 0.835195530726257, + "eval_overall_precision": 0.791005291005291, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2918, + "eval_samples_per_second": 640.799, + "eval_steps_per_second": 10.28, + "step": 8162 + }, + { + "epoch": 78.0, + "grad_norm": 1.434624195098877, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0375, + "step": 8268 + }, + { + "epoch": 78.0, + "eval_LOCATION_f1": 0.882758620689655, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8421052631578947, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7286821705426356, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6619718309859155, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15279057621955872, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.829608938547486, + "eval_overall_precision": 0.7857142857142857, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2926, + "eval_samples_per_second": 639.034, + "eval_steps_per_second": 10.252, + "step": 8268 + }, + { + "epoch": 79.0, + "grad_norm": 0.7510130405426025, + "learning_rate": 1.05e-05, + "loss": 0.0373, + "step": 8374 + }, + { + "epoch": 79.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7244094488188977, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.1517334133386612, + "eval_overall_accuracy": 0.964404609475032, + "eval_overall_f1": 0.8330995792426368, + "eval_overall_precision": 0.792, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2862, + "eval_samples_per_second": 653.312, + "eval_steps_per_second": 10.481, + "step": 8374 + }, + { + "epoch": 80.0, + "grad_norm": 0.686745822429657, + "learning_rate": 1e-05, + "loss": 0.0375, + "step": 8480 + }, + { + "epoch": 80.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7401574803149606, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6811594202898551, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8553459119496855, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8192771084337349, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15770980715751648, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.829608938547486, + "eval_overall_precision": 0.7857142857142857, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2918, + "eval_samples_per_second": 640.906, + "eval_steps_per_second": 10.282, + "step": 8480 + }, + { + "epoch": 81.0, + "grad_norm": 2.2296547889709473, + "learning_rate": 9.5e-06, + "loss": 0.0371, + "step": 8586 + }, + { + "epoch": 81.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7384615384615385, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.1602996438741684, + "eval_overall_accuracy": 0.963124199743918, + "eval_overall_f1": 0.8317107093184978, + "eval_overall_precision": 0.7847769028871391, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2862, + "eval_samples_per_second": 653.41, + "eval_steps_per_second": 10.483, + "step": 8586 + }, + { + "epoch": 82.0, + "grad_norm": 1.0859251022338867, + "learning_rate": 9e-06, + "loss": 0.0364, + "step": 8692 + }, + { + "epoch": 82.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7441860465116279, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.676056338028169, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15970273315906525, + "eval_overall_accuracy": 0.9633802816901409, + "eval_overall_f1": 0.8328690807799444, + "eval_overall_precision": 0.7868421052631579, + "eval_overall_recall": 0.8846153846153846, + "eval_runtime": 0.2856, + "eval_samples_per_second": 654.808, + "eval_steps_per_second": 10.505, + "step": 8692 + }, + { + "epoch": 83.0, + "grad_norm": 0.759631872177124, + "learning_rate": 8.500000000000002e-06, + "loss": 0.0355, + "step": 8798 + }, + { + "epoch": 83.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.71875, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6571428571428571, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15819035470485687, + "eval_overall_accuracy": 0.963124199743918, + "eval_overall_f1": 0.8284518828451882, + "eval_overall_precision": 0.783641160949868, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2921, + "eval_samples_per_second": 640.257, + "eval_steps_per_second": 10.271, + "step": 8798 + }, + { + "epoch": 84.0, + "grad_norm": 3.2821595668792725, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0352, + "step": 8904 + }, + { + "epoch": 84.0, + "eval_LOCATION_f1": 0.8648648648648648, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.810126582278481, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7175572519083969, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6438356164383562, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8620689655172413, + "eval_loss": 0.1597450226545334, + "eval_overall_accuracy": 0.9620998719590269, + "eval_overall_f1": 0.8210818307905687, + "eval_overall_precision": 0.7728459530026109, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.2916, + "eval_samples_per_second": 641.288, + "eval_steps_per_second": 10.288, + "step": 8904 + }, + { + "epoch": 85.0, + "grad_norm": 1.4578148126602173, + "learning_rate": 7.5e-06, + "loss": 0.0351, + "step": 9010 + }, + { + "epoch": 85.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.71875, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6571428571428571, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8343558282208589, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15464067459106445, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8319327731092437, + "eval_overall_precision": 0.7898936170212766, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.291, + "eval_samples_per_second": 642.532, + "eval_steps_per_second": 10.308, + "step": 9010 + }, + { + "epoch": 86.0, + "grad_norm": 3.6410322189331055, + "learning_rate": 7.000000000000001e-06, + "loss": 0.0358, + "step": 9116 + }, + { + "epoch": 86.0, + "eval_LOCATION_f1": 0.882758620689655, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8421052631578947, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7131782945736435, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.647887323943662, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8343558282208589, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15458884835243225, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8319327731092437, + "eval_overall_precision": 0.7898936170212766, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2879, + "eval_samples_per_second": 649.619, + "eval_steps_per_second": 10.422, + "step": 9116 + }, + { + "epoch": 87.0, + "grad_norm": 0.8341395854949951, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.0359, + "step": 9222 + }, + { + "epoch": 87.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7175572519083969, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6438356164383562, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15657885372638702, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8312412831241283, + "eval_overall_precision": 0.7862796833773087, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.2923, + "eval_samples_per_second": 639.721, + "eval_steps_per_second": 10.263, + "step": 9222 + }, + { + "epoch": 88.0, + "grad_norm": 2.221242904663086, + "learning_rate": 6e-06, + "loss": 0.0338, + "step": 9328 + }, + { + "epoch": 88.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7086614173228347, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6521739130434783, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15500542521476746, + "eval_overall_accuracy": 0.9641485275288092, + "eval_overall_f1": 0.8302945301542777, + "eval_overall_precision": 0.7893333333333333, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.2854, + "eval_samples_per_second": 655.275, + "eval_steps_per_second": 10.512, + "step": 9328 + }, + { + "epoch": 89.0, + "grad_norm": 0.7178964018821716, + "learning_rate": 5.500000000000001e-06, + "loss": 0.0324, + "step": 9434 + }, + { + "epoch": 89.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.703125, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6428571428571429, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15786349773406982, + "eval_overall_accuracy": 0.9633802816901409, + "eval_overall_f1": 0.8256624825662484, + "eval_overall_precision": 0.7810026385224275, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.2857, + "eval_samples_per_second": 654.47, + "eval_steps_per_second": 10.5, + "step": 9434 + }, + { + "epoch": 90.0, + "grad_norm": 1.6527904272079468, + "learning_rate": 5e-06, + "loss": 0.034, + "step": 9540 + }, + { + "epoch": 90.0, + "eval_LOCATION_f1": 0.8571428571428572, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8076923076923077, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.71875, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6571428571428571, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15783792734146118, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8268156424581006, + "eval_overall_precision": 0.783068783068783, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.2925, + "eval_samples_per_second": 639.316, + "eval_steps_per_second": 10.256, + "step": 9540 + }, + { + "epoch": 91.0, + "grad_norm": 1.0714702606201172, + "learning_rate": 4.5e-06, + "loss": 0.0353, + "step": 9646 + }, + { + "epoch": 91.0, + "eval_LOCATION_f1": 0.8571428571428572, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8076923076923077, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7343749999999999, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6714285714285714, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15797153115272522, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.829608938547486, + "eval_overall_precision": 0.7857142857142857, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.286, + "eval_samples_per_second": 653.836, + "eval_steps_per_second": 10.489, + "step": 9646 + }, + { + "epoch": 92.0, + "grad_norm": 1.2929750680923462, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0325, + "step": 9752 + }, + { + "epoch": 92.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.71875, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6571428571428571, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.78125, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7352941176470589, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15651264786720276, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8307692307692308, + "eval_overall_precision": 0.7877984084880637, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2929, + "eval_samples_per_second": 638.474, + "eval_steps_per_second": 10.243, + "step": 9752 + }, + { + "epoch": 93.0, + "grad_norm": 0.8345323801040649, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.0344, + "step": 9858 + }, + { + "epoch": 93.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7343749999999999, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6714285714285714, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.1590217500925064, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8284518828451882, + "eval_overall_precision": 0.783641160949868, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2919, + "eval_samples_per_second": 640.648, + "eval_steps_per_second": 10.278, + "step": 9858 + }, + { + "epoch": 94.0, + "grad_norm": 0.6701174974441528, + "learning_rate": 3e-06, + "loss": 0.0336, + "step": 9964 + }, + { + "epoch": 94.0, + "eval_LOCATION_f1": 0.8571428571428572, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8076923076923077, + "eval_LOCATION_recall": 0.9130434782608695, + "eval_ORGANIZATION_f1": 0.7286821705426356, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6619718309859155, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7058823529411765, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.1604832261800766, + "eval_overall_accuracy": 0.9633802816901409, + "eval_overall_f1": 0.8256624825662484, + "eval_overall_precision": 0.7810026385224275, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.2864, + "eval_samples_per_second": 652.822, + "eval_steps_per_second": 10.473, + "step": 9964 + }, + { + "epoch": 95.0, + "grad_norm": 0.5713714361190796, + "learning_rate": 2.5e-06, + "loss": 0.0337, + "step": 10070 + }, + { + "epoch": 95.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7244094488188977, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.16034719347953796, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.829608938547486, + "eval_overall_precision": 0.7857142857142857, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2857, + "eval_samples_per_second": 654.607, + "eval_steps_per_second": 10.502, + "step": 10070 + }, + { + "epoch": 96.0, + "grad_norm": 1.4332071542739868, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.0326, + "step": 10176 + }, + { + "epoch": 96.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.703125, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6428571428571429, + "eval_ORGANIZATION_recall": 0.7758620689655172, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.1593874990940094, + "eval_overall_accuracy": 0.9633802816901409, + "eval_overall_f1": 0.8256624825662484, + "eval_overall_precision": 0.7810026385224275, + "eval_overall_recall": 0.8757396449704142, + "eval_runtime": 0.2921, + "eval_samples_per_second": 640.143, + "eval_steps_per_second": 10.27, + "step": 10176 + }, + { + "epoch": 97.0, + "grad_norm": 1.3420923948287964, + "learning_rate": 1.5e-06, + "loss": 0.0352, + "step": 10282 + }, + { + "epoch": 97.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7244094488188977, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6666666666666666, + "eval_ORGANIZATION_recall": 0.7931034482758621, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7692307692307692, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.7142857142857143, + "eval_QUANTITY_recall": 0.8333333333333334, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15942679345607758, + "eval_overall_accuracy": 0.9641485275288092, + "eval_overall_f1": 0.8307692307692308, + "eval_overall_precision": 0.7877984084880637, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2927, + "eval_samples_per_second": 638.801, + "eval_steps_per_second": 10.248, + "step": 10282 + }, + { + "epoch": 98.0, + "grad_norm": 2.652052402496338, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.0329, + "step": 10388 + }, + { + "epoch": 98.0, + "eval_LOCATION_f1": 0.8767123287671235, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8311688311688312, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7343749999999999, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6714285714285714, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.1578895002603531, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8307692307692308, + "eval_overall_precision": 0.7877984084880637, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2858, + "eval_samples_per_second": 654.385, + "eval_steps_per_second": 10.498, + "step": 10388 + }, + { + "epoch": 99.0, + "grad_norm": 1.3167122602462769, + "learning_rate": 5.000000000000001e-07, + "loss": 0.0321, + "step": 10494 + }, + { + "epoch": 99.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.75, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6857142857142857, + "eval_ORGANIZATION_recall": 0.8275862068965517, + "eval_PERSON_f1": 0.860759493670886, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.15904921293258667, + "eval_overall_accuracy": 0.9641485275288092, + "eval_overall_f1": 0.8324022346368716, + "eval_overall_precision": 0.7883597883597884, + "eval_overall_recall": 0.8816568047337278, + "eval_runtime": 0.293, + "eval_samples_per_second": 638.168, + "eval_steps_per_second": 10.238, + "step": 10494 + }, + { + "epoch": 100.0, + "grad_norm": 0.26482483744621277, + "learning_rate": 0.0, + "loss": 0.0322, + "step": 10600 + }, + { + "epoch": 100.0, + "eval_LOCATION_f1": 0.870748299319728, + "eval_LOCATION_number": 69, + "eval_LOCATION_precision": 0.8205128205128205, + "eval_LOCATION_recall": 0.927536231884058, + "eval_ORGANIZATION_f1": 0.7343749999999999, + "eval_ORGANIZATION_number": 58, + "eval_ORGANIZATION_precision": 0.6714285714285714, + "eval_ORGANIZATION_recall": 0.8103448275862069, + "eval_PERSON_f1": 0.8580441640378549, + "eval_PERSON_number": 152, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.8947368421052632, + "eval_QUANTITY_f1": 0.7384615384615385, + "eval_QUANTITY_number": 30, + "eval_QUANTITY_precision": 0.6857142857142857, + "eval_QUANTITY_recall": 0.8, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 29, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.896551724137931, + "eval_loss": 0.1596968173980713, + "eval_overall_accuracy": 0.9638924455825865, + "eval_overall_f1": 0.8284518828451882, + "eval_overall_precision": 0.783641160949868, + "eval_overall_recall": 0.878698224852071, + "eval_runtime": 0.2913, + "eval_samples_per_second": 641.846, + "eval_steps_per_second": 10.297, + "step": 10600 + }, + { + "epoch": 100.0, + "step": 10600, + "total_flos": 4535639530574946.0, + "train_loss": 0.08503569045156803, + "train_runtime": 606.7172, + "train_samples_per_second": 278.054, + "train_steps_per_second": 17.471 + } + ], + "logging_steps": 500, + "max_steps": 10600, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 500, + "total_flos": 4535639530574946.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}