{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.59092116355896, "learning_rate": 4.9500000000000004e-05, "loss": 1.0611, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.0, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.0, "eval_ORGANIZATION_recall": 0.0, "eval_PERSON_f1": 0.0, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, "eval_loss": 0.6536115407943726, "eval_overall_accuracy": 0.8434547908232118, "eval_overall_f1": 0.0, "eval_overall_precision": 0.0, "eval_overall_recall": 0.0, "eval_runtime": 0.2789, "eval_samples_per_second": 609.549, "eval_steps_per_second": 10.757, "step": 96 }, { "epoch": 2.0, "grad_norm": 0.9631377458572388, "learning_rate": 4.9e-05, "loss": 0.6324, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.053475935828877004, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.5555555555555556, "eval_ORGANIZATION_recall": 0.028089887640449437, "eval_PERSON_f1": 0.0, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, "eval_loss": 0.5023020505905151, "eval_overall_accuracy": 0.844804318488529, "eval_overall_f1": 0.024752475247524757, "eval_overall_precision": 0.4166666666666667, "eval_overall_recall": 0.012755102040816327, "eval_runtime": 0.281, "eval_samples_per_second": 604.953, "eval_steps_per_second": 10.676, "step": 192 }, { "epoch": 3.0, "grad_norm": 1.5459109544754028, "learning_rate": 4.85e-05, "loss": 0.4878, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.0425531914893617, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.25, "eval_LOCATION_recall": 0.023255813953488372, "eval_ORGANIZATION_f1": 0.27205882352941174, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.39361702127659576, "eval_ORGANIZATION_recall": 0.20786516853932585, "eval_PERSON_f1": 0.3529411764705882, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.3543307086614173, "eval_PERSON_recall": 0.3515625, "eval_loss": 0.348246306180954, "eval_overall_accuracy": 0.8798920377867746, "eval_overall_f1": 0.2705314009661836, "eval_overall_precision": 0.36681222707423583, "eval_overall_recall": 0.21428571428571427, "eval_runtime": 0.2824, "eval_samples_per_second": 601.961, "eval_steps_per_second": 10.623, "step": 288 }, { "epoch": 4.0, "grad_norm": 0.7495359182357788, "learning_rate": 4.8e-05, "loss": 0.341, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.4, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.5185185185185185, "eval_LOCATION_recall": 0.32558139534883723, "eval_ORGANIZATION_f1": 0.5758354755784062, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.5308056872037915, "eval_ORGANIZATION_recall": 0.6292134831460674, "eval_PERSON_f1": 0.6460481099656357, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.5766871165644172, "eval_PERSON_recall": 0.734375, "eval_loss": 0.23863022029399872, "eval_overall_accuracy": 0.9295546558704454, "eval_overall_f1": 0.5707317073170732, "eval_overall_precision": 0.5467289719626168, "eval_overall_recall": 0.5969387755102041, "eval_runtime": 0.2835, "eval_samples_per_second": 599.753, "eval_steps_per_second": 10.584, "step": 384 }, { "epoch": 5.0, "grad_norm": 0.906302273273468, "learning_rate": 4.75e-05, "loss": 0.2391, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.6829268292682927, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.717948717948718, "eval_LOCATION_recall": 0.6511627906976745, "eval_ORGANIZATION_f1": 0.7131782945736435, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.6602870813397129, "eval_ORGANIZATION_recall": 0.7752808988764045, "eval_PERSON_f1": 0.8686131386861314, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.815068493150685, "eval_PERSON_recall": 0.9296875, "eval_loss": 0.17454874515533447, "eval_overall_accuracy": 0.9546558704453442, "eval_overall_f1": 0.7587878787878788, "eval_overall_precision": 0.7228637413394919, "eval_overall_recall": 0.798469387755102, "eval_runtime": 0.2885, "eval_samples_per_second": 589.304, "eval_steps_per_second": 10.399, "step": 480 }, { "epoch": 6.0, "grad_norm": 1.3750990629196167, "learning_rate": 4.7e-05, "loss": 0.1867, "step": 576 }, { "epoch": 6.0, "eval_LOCATION_f1": 0.7802197802197802, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7395833333333334, "eval_LOCATION_recall": 0.8255813953488372, "eval_ORGANIZATION_f1": 0.772117962466488, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.7384615384615385, "eval_ORGANIZATION_recall": 0.8089887640449438, "eval_PERSON_f1": 0.9393939393939394, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9117647058823529, "eval_PERSON_recall": 0.96875, "eval_loss": 0.1380176693201065, "eval_overall_accuracy": 0.9654520917678813, "eval_overall_f1": 0.8278388278388279, "eval_overall_precision": 0.7939110070257611, "eval_overall_recall": 0.8647959183673469, "eval_runtime": 0.2842, "eval_samples_per_second": 598.123, "eval_steps_per_second": 10.555, "step": 576 }, { "epoch": 7.0, "grad_norm": 1.126783013343811, "learning_rate": 4.6500000000000005e-05, "loss": 0.1578, "step": 672 }, { "epoch": 7.0, "eval_LOCATION_f1": 0.7912087912087912, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.75, "eval_LOCATION_recall": 0.8372093023255814, "eval_ORGANIZATION_f1": 0.8128342245989305, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.7755102040816326, "eval_ORGANIZATION_recall": 0.8539325842696629, "eval_PERSON_f1": 0.9398496240601504, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9057971014492754, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.11496106535196304, "eval_overall_accuracy": 0.9689608636977058, "eval_overall_f1": 0.8491484184914841, "eval_overall_precision": 0.8116279069767441, "eval_overall_recall": 0.8903061224489796, "eval_runtime": 0.2838, "eval_samples_per_second": 598.921, "eval_steps_per_second": 10.569, "step": 672 }, { "epoch": 8.0, "grad_norm": 0.4904704689979553, "learning_rate": 4.600000000000001e-05, "loss": 0.1374, "step": 768 }, { "epoch": 8.0, "eval_LOCATION_f1": 0.8111111111111111, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.776595744680851, "eval_LOCATION_recall": 0.8488372093023255, "eval_ORGANIZATION_f1": 0.8369565217391305, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8105263157894737, "eval_ORGANIZATION_recall": 0.8651685393258427, "eval_PERSON_f1": 0.946969696969697, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9191176470588235, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.09796737134456635, "eval_overall_accuracy": 0.9730094466936572, "eval_overall_f1": 0.8669950738916257, "eval_overall_precision": 0.8380952380952381, "eval_overall_recall": 0.8979591836734694, "eval_runtime": 0.2835, "eval_samples_per_second": 599.64, "eval_steps_per_second": 10.582, "step": 768 }, { "epoch": 9.0, "grad_norm": 0.5990379452705383, "learning_rate": 4.55e-05, "loss": 0.1267, "step": 864 }, { "epoch": 9.0, "eval_LOCATION_f1": 0.8279569892473118, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.77, "eval_LOCATION_recall": 0.8953488372093024, "eval_ORGANIZATION_f1": 0.8743169398907104, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.851063829787234, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9541984732824427, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9328358208955224, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0881563127040863, "eval_overall_accuracy": 0.9748987854251012, "eval_overall_f1": 0.8894348894348895, "eval_overall_precision": 0.8578199052132701, "eval_overall_recall": 0.923469387755102, "eval_runtime": 0.2839, "eval_samples_per_second": 598.709, "eval_steps_per_second": 10.565, "step": 864 }, { "epoch": 10.0, "grad_norm": 0.9473337531089783, "learning_rate": 4.5e-05, "loss": 0.115, "step": 960 }, { "epoch": 10.0, "eval_LOCATION_f1": 0.858695652173913, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8061224489795918, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.8750000000000001, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8473684210526315, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9541984732824427, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9328358208955224, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.08216945081949234, "eval_overall_accuracy": 0.9765182186234818, "eval_overall_f1": 0.8968058968058967, "eval_overall_precision": 0.8649289099526066, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2832, "eval_samples_per_second": 600.342, "eval_steps_per_second": 10.594, "step": 960 }, { "epoch": 11.0, "grad_norm": 1.1330217123031616, "learning_rate": 4.4500000000000004e-05, "loss": 0.1082, "step": 1056 }, { "epoch": 11.0, "eval_LOCATION_f1": 0.8306010928961748, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7835051546391752, "eval_LOCATION_recall": 0.8837209302325582, "eval_ORGANIZATION_f1": 0.8681318681318682, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8494623655913979, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9575289575289575, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9465648854961832, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07546364516019821, "eval_overall_accuracy": 0.9767881241565453, "eval_overall_f1": 0.8883374689826301, "eval_overall_precision": 0.8647342995169082, "eval_overall_recall": 0.9132653061224489, "eval_runtime": 0.2948, "eval_samples_per_second": 576.643, "eval_steps_per_second": 10.176, "step": 1056 }, { "epoch": 12.0, "grad_norm": 0.7142030000686646, "learning_rate": 4.4000000000000006e-05, "loss": 0.1032, "step": 1152 }, { "epoch": 12.0, "eval_LOCATION_f1": 0.88268156424581, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8494623655913979, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.8858695652173914, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8578947368421053, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.950191570881226, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9323308270676691, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07244797050952911, "eval_overall_accuracy": 0.9781376518218623, "eval_overall_f1": 0.9059405940594059, "eval_overall_precision": 0.8798076923076923, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2869, "eval_samples_per_second": 592.583, "eval_steps_per_second": 10.457, "step": 1152 }, { "epoch": 13.0, "grad_norm": 1.8301559686660767, "learning_rate": 4.35e-05, "loss": 0.0944, "step": 1248 }, { "epoch": 13.0, "eval_LOCATION_f1": 0.8977272727272728, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8777777777777778, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.8895027624309392, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.875, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9575289575289575, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9465648854961832, "eval_PERSON_recall": 0.96875, "eval_loss": 0.064577117562294, "eval_overall_accuracy": 0.9800269905533063, "eval_overall_f1": 0.9134253450439147, "eval_overall_precision": 0.8987654320987655, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.2882, "eval_samples_per_second": 589.923, "eval_steps_per_second": 10.41, "step": 1248 }, { "epoch": 14.0, "grad_norm": 1.3042991161346436, "learning_rate": 4.3e-05, "loss": 0.0923, "step": 1344 }, { "epoch": 14.0, "eval_LOCATION_f1": 0.847457627118644, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8241758241758241, "eval_LOCATION_recall": 0.872093023255814, "eval_ORGANIZATION_f1": 0.8864265927977839, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8743169398907104, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9612403100775193, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9538461538461539, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06380651891231537, "eval_overall_accuracy": 0.979757085020243, "eval_overall_f1": 0.9020100502512562, "eval_overall_precision": 0.8886138613861386, "eval_overall_recall": 0.9158163265306123, "eval_runtime": 0.2837, "eval_samples_per_second": 599.179, "eval_steps_per_second": 10.574, "step": 1344 }, { "epoch": 15.0, "grad_norm": 1.2364366054534912, "learning_rate": 4.25e-05, "loss": 0.0918, "step": 1440 }, { "epoch": 15.0, "eval_LOCATION_f1": 0.8813559322033897, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8571428571428571, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9005524861878453, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8858695652173914, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9689922480620154, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9615384615384616, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06226590648293495, "eval_overall_accuracy": 0.9805668016194332, "eval_overall_f1": 0.918444165621079, "eval_overall_precision": 0.9037037037037037, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2844, "eval_samples_per_second": 597.73, "eval_steps_per_second": 10.548, "step": 1440 }, { "epoch": 16.0, "grad_norm": 0.7908528447151184, "learning_rate": 4.2e-05, "loss": 0.0848, "step": 1536 }, { "epoch": 16.0, "eval_LOCATION_f1": 0.8666666666666666, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8297872340425532, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8839779005524863, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8695652173913043, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.061539795249700546, "eval_overall_accuracy": 0.979757085020243, "eval_overall_f1": 0.9097744360902256, "eval_overall_precision": 0.8940886699507389, "eval_overall_recall": 0.9260204081632653, "eval_runtime": 0.2826, "eval_samples_per_second": 601.652, "eval_steps_per_second": 10.617, "step": 1536 }, { "epoch": 17.0, "grad_norm": 1.4613542556762695, "learning_rate": 4.15e-05, "loss": 0.0818, "step": 1632 }, { "epoch": 17.0, "eval_LOCATION_f1": 0.88268156424581, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8494623655913979, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.8913649025069637, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8839779005524862, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05940384417772293, "eval_overall_accuracy": 0.9813765182186235, "eval_overall_f1": 0.9157232704402517, "eval_overall_precision": 0.9032258064516129, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.286, "eval_samples_per_second": 594.462, "eval_steps_per_second": 10.491, "step": 1632 }, { "epoch": 18.0, "grad_norm": 0.6216811537742615, "learning_rate": 4.1e-05, "loss": 0.0797, "step": 1728 }, { "epoch": 18.0, "eval_LOCATION_f1": 0.8914285714285715, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8764044943820225, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8913649025069637, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8839779005524862, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0576835535466671, "eval_overall_accuracy": 0.9813765182186235, "eval_overall_f1": 0.9189873417721519, "eval_overall_precision": 0.9120603015075377, "eval_overall_recall": 0.9260204081632653, "eval_runtime": 0.2839, "eval_samples_per_second": 598.831, "eval_steps_per_second": 10.568, "step": 1728 }, { "epoch": 19.0, "grad_norm": 1.1245906352996826, "learning_rate": 4.05e-05, "loss": 0.0745, "step": 1824 }, { "epoch": 19.0, "eval_LOCATION_f1": 0.8863636363636364, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8666666666666667, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8975069252077562, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8852459016393442, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05725981667637825, "eval_overall_accuracy": 0.9813765182186235, "eval_overall_f1": 0.9205548549810846, "eval_overall_precision": 0.9102244389027432, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2848, "eval_samples_per_second": 596.982, "eval_steps_per_second": 10.535, "step": 1824 }, { "epoch": 20.0, "grad_norm": 0.51052325963974, "learning_rate": 4e-05, "loss": 0.0747, "step": 1920 }, { "epoch": 20.0, "eval_LOCATION_f1": 0.8764044943820224, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8478260869565217, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9030470914127424, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8907103825136612, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05537872388958931, "eval_overall_accuracy": 0.9816464237516869, "eval_overall_f1": 0.9207547169811321, "eval_overall_precision": 0.9081885856079405, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2834, "eval_samples_per_second": 599.843, "eval_steps_per_second": 10.585, "step": 1920 }, { "epoch": 21.0, "grad_norm": 1.993929147720337, "learning_rate": 3.9500000000000005e-05, "loss": 0.0702, "step": 2016 }, { "epoch": 21.0, "eval_LOCATION_f1": 0.8715083798882681, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8387096774193549, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8876404494382022, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8876404494382022, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.055962275713682175, "eval_overall_accuracy": 0.9811066126855601, "eval_overall_f1": 0.9127686472819216, "eval_overall_precision": 0.9047619047619048, "eval_overall_recall": 0.9209183673469388, "eval_runtime": 0.2878, "eval_samples_per_second": 590.706, "eval_steps_per_second": 10.424, "step": 2016 }, { "epoch": 22.0, "grad_norm": 1.0807236433029175, "learning_rate": 3.9000000000000006e-05, "loss": 0.0701, "step": 2112 }, { "epoch": 22.0, "eval_LOCATION_f1": 0.8729281767955801, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8315789473684211, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9065934065934066, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8870967741935484, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.055046580731868744, "eval_overall_accuracy": 0.9821862348178138, "eval_overall_f1": 0.9213483146067415, "eval_overall_precision": 0.902200488997555, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2838, "eval_samples_per_second": 599.102, "eval_steps_per_second": 10.572, "step": 2112 }, { "epoch": 23.0, "grad_norm": 1.5917891263961792, "learning_rate": 3.85e-05, "loss": 0.0663, "step": 2208 }, { "epoch": 23.0, "eval_LOCATION_f1": 0.8764044943820224, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8478260869565217, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9044943820224719, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9044943820224719, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05204461142420769, "eval_overall_accuracy": 0.9832658569500675, "eval_overall_f1": 0.9215189873417722, "eval_overall_precision": 0.914572864321608, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.2847, "eval_samples_per_second": 597.205, "eval_steps_per_second": 10.539, "step": 2208 }, { "epoch": 24.0, "grad_norm": 0.8088048696517944, "learning_rate": 3.8e-05, "loss": 0.0666, "step": 2304 }, { "epoch": 24.0, "eval_LOCATION_f1": 0.861878453038674, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8210526315789474, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8845070422535212, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8870056497175142, "eval_ORGANIZATION_recall": 0.8820224719101124, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.054312530905008316, "eval_overall_accuracy": 0.9805668016194332, "eval_overall_f1": 0.9090909090909091, "eval_overall_precision": 0.9, "eval_overall_recall": 0.9183673469387755, "eval_runtime": 0.2832, "eval_samples_per_second": 600.201, "eval_steps_per_second": 10.592, "step": 2304 }, { "epoch": 25.0, "grad_norm": 0.48308050632476807, "learning_rate": 3.7500000000000003e-05, "loss": 0.0635, "step": 2400 }, { "epoch": 25.0, "eval_LOCATION_f1": 0.8729281767955801, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8315789473684211, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.8901408450704225, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8926553672316384, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05242611840367317, "eval_overall_accuracy": 0.9821862348178138, "eval_overall_f1": 0.9141414141414141, "eval_overall_precision": 0.905, "eval_overall_recall": 0.923469387755102, "eval_runtime": 0.284, "eval_samples_per_second": 598.6, "eval_steps_per_second": 10.564, "step": 2400 }, { "epoch": 26.0, "grad_norm": 0.29692307114601135, "learning_rate": 3.7e-05, "loss": 0.0632, "step": 2496 }, { "epoch": 26.0, "eval_LOCATION_f1": 0.8839779005524862, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8421052631578947, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.8901408450704225, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8926553672316384, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05194313824176788, "eval_overall_accuracy": 0.9824561403508771, "eval_overall_f1": 0.9166666666666666, "eval_overall_precision": 0.9075, "eval_overall_recall": 0.9260204081632653, "eval_runtime": 0.289, "eval_samples_per_second": 588.272, "eval_steps_per_second": 10.381, "step": 2496 }, { "epoch": 27.0, "grad_norm": 1.3001991510391235, "learning_rate": 3.65e-05, "loss": 0.0596, "step": 2592 }, { "epoch": 27.0, "eval_LOCATION_f1": 0.88268156424581, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8494623655913979, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.8913649025069637, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8839779005524862, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04886069521307945, "eval_overall_accuracy": 0.982995951417004, "eval_overall_f1": 0.9168765743073048, "eval_overall_precision": 0.9054726368159204, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.2848, "eval_samples_per_second": 596.934, "eval_steps_per_second": 10.534, "step": 2592 }, { "epoch": 28.0, "grad_norm": 1.071866512298584, "learning_rate": 3.6e-05, "loss": 0.0608, "step": 2688 }, { "epoch": 28.0, "eval_LOCATION_f1": 0.8729281767955801, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8315789473684211, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.8901408450704225, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8926553672316384, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05084596574306488, "eval_overall_accuracy": 0.9824561403508771, "eval_overall_f1": 0.9152970922882427, "eval_overall_precision": 0.9072681704260651, "eval_overall_recall": 0.923469387755102, "eval_runtime": 0.2839, "eval_samples_per_second": 598.86, "eval_steps_per_second": 10.568, "step": 2688 }, { "epoch": 29.0, "grad_norm": 0.7404230237007141, "learning_rate": 3.55e-05, "loss": 0.0591, "step": 2784 }, { "epoch": 29.0, "eval_LOCATION_f1": 0.9017341040462429, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.896551724137931, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9085872576177285, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8961748633879781, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04637107998132706, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.929113924050633, "eval_overall_precision": 0.9221105527638191, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2854, "eval_samples_per_second": 595.621, "eval_steps_per_second": 10.511, "step": 2784 }, { "epoch": 30.0, "grad_norm": 0.5528072118759155, "learning_rate": 3.5e-05, "loss": 0.0582, "step": 2880 }, { "epoch": 30.0, "eval_LOCATION_f1": 0.896551724137931, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8863636363636364, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.925207756232687, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.912568306010929, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04715248569846153, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9355246523388117, "eval_overall_precision": 0.9273182957393483, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2877, "eval_samples_per_second": 590.827, "eval_steps_per_second": 10.426, "step": 2880 }, { "epoch": 31.0, "grad_norm": 0.6404902338981628, "learning_rate": 3.45e-05, "loss": 0.0567, "step": 2976 }, { "epoch": 31.0, "eval_LOCATION_f1": 0.8791208791208791, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8333333333333334, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.8895184135977336, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8971428571428571, "eval_ORGANIZATION_recall": 0.8820224719101124, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05179203301668167, "eval_overall_accuracy": 0.9824561403508771, "eval_overall_f1": 0.9152970922882427, "eval_overall_precision": 0.9072681704260651, "eval_overall_recall": 0.923469387755102, "eval_runtime": 0.2853, "eval_samples_per_second": 595.916, "eval_steps_per_second": 10.516, "step": 2976 }, { "epoch": 32.0, "grad_norm": 0.6805378794670105, "learning_rate": 3.4000000000000007e-05, "loss": 0.0545, "step": 3072 }, { "epoch": 32.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9055555555555556, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8956043956043956, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0493222177028656, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9255989911727616, "eval_overall_precision": 0.9152119700748129, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2872, "eval_samples_per_second": 591.974, "eval_steps_per_second": 10.447, "step": 3072 }, { "epoch": 33.0, "grad_norm": 0.5877014398574829, "learning_rate": 3.35e-05, "loss": 0.0526, "step": 3168 }, { "epoch": 33.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.8994413407821229, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8944444444444445, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.04881291836500168, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9217171717171717, "eval_overall_precision": 0.9125, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2841, "eval_samples_per_second": 598.433, "eval_steps_per_second": 10.561, "step": 3168 }, { "epoch": 34.0, "grad_norm": 1.083030104637146, "learning_rate": 3.3e-05, "loss": 0.0536, "step": 3264 }, { "epoch": 34.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.898876404494382, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9213483146067416, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9213483146067416, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04811395704746246, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9377382465057178, "eval_overall_precision": 0.9341772151898734, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2853, "eval_samples_per_second": 595.947, "eval_steps_per_second": 10.517, "step": 3264 }, { "epoch": 35.0, "grad_norm": 1.0067145824432373, "learning_rate": 3.2500000000000004e-05, "loss": 0.0501, "step": 3360 }, { "epoch": 35.0, "eval_LOCATION_f1": 0.9090909090909092, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9157303370786517, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9157303370786517, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04817729443311691, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.934010152284264, "eval_overall_precision": 0.9292929292929293, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2851, "eval_samples_per_second": 596.38, "eval_steps_per_second": 10.524, "step": 3360 }, { "epoch": 36.0, "grad_norm": 1.3347820043563843, "learning_rate": 3.2000000000000005e-05, "loss": 0.0541, "step": 3456 }, { "epoch": 36.0, "eval_LOCATION_f1": 0.9090909090909092, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9269662921348315, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9269662921348315, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04808411747217178, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9390862944162437, "eval_overall_precision": 0.9343434343434344, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2833, "eval_samples_per_second": 600.141, "eval_steps_per_second": 10.591, "step": 3456 }, { "epoch": 37.0, "grad_norm": 1.8276389837265015, "learning_rate": 3.15e-05, "loss": 0.0513, "step": 3552 }, { "epoch": 37.0, "eval_LOCATION_f1": 0.8977272727272728, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8777777777777778, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9187675070028011, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9162011173184358, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04752321168780327, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9328263624841572, "eval_overall_precision": 0.9269521410579346, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2841, "eval_samples_per_second": 598.336, "eval_steps_per_second": 10.559, "step": 3552 }, { "epoch": 38.0, "grad_norm": 0.28197354078292847, "learning_rate": 3.1e-05, "loss": 0.0506, "step": 3648 }, { "epoch": 38.0, "eval_LOCATION_f1": 0.893854748603352, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8602150537634409, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9044943820224719, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9044943820224719, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04833677411079407, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9265822784810125, "eval_overall_precision": 0.9195979899497487, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2848, "eval_samples_per_second": 597.014, "eval_steps_per_second": 10.536, "step": 3648 }, { "epoch": 39.0, "grad_norm": 0.9661975502967834, "learning_rate": 3.05e-05, "loss": 0.0483, "step": 3744 }, { "epoch": 39.0, "eval_LOCATION_f1": 0.8999999999999999, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8617021276595744, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9213483146067416, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9213483146067416, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04982369393110275, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9355246523388117, "eval_overall_precision": 0.9273182957393483, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2835, "eval_samples_per_second": 599.682, "eval_steps_per_second": 10.583, "step": 3744 }, { "epoch": 40.0, "grad_norm": 0.5004177689552307, "learning_rate": 3e-05, "loss": 0.0481, "step": 3840 }, { "epoch": 40.0, "eval_LOCATION_f1": 0.9028571428571427, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8876404494382022, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.902506963788301, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8950276243093923, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04674072936177254, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9277566539923955, "eval_overall_precision": 0.9219143576826196, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2852, "eval_samples_per_second": 596.048, "eval_steps_per_second": 10.518, "step": 3840 }, { "epoch": 41.0, "grad_norm": 0.16864213347434998, "learning_rate": 2.95e-05, "loss": 0.0463, "step": 3936 }, { "epoch": 41.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.907563025210084, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9050279329608939, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0470806360244751, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.929113924050633, "eval_overall_precision": 0.9221105527638191, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2857, "eval_samples_per_second": 595.08, "eval_steps_per_second": 10.501, "step": 3936 }, { "epoch": 42.0, "grad_norm": 1.2321958541870117, "learning_rate": 2.9e-05, "loss": 0.0461, "step": 4032 }, { "epoch": 42.0, "eval_LOCATION_f1": 0.9080459770114941, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8977272727272727, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.935933147632312, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9281767955801105, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.04560821130871773, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9416243654822335, "eval_overall_precision": 0.9368686868686869, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2883, "eval_samples_per_second": 589.592, "eval_steps_per_second": 10.405, "step": 4032 }, { "epoch": 43.0, "grad_norm": 0.0813780352473259, "learning_rate": 2.8499999999999998e-05, "loss": 0.0454, "step": 4128 }, { "epoch": 43.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9162011173184358, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9111111111111111, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04586649686098099, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.9341772151898734, "eval_overall_precision": 0.9271356783919598, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2849, "eval_samples_per_second": 596.667, "eval_steps_per_second": 10.529, "step": 4128 }, { "epoch": 44.0, "grad_norm": 1.1783406734466553, "learning_rate": 2.8000000000000003e-05, "loss": 0.0459, "step": 4224 }, { "epoch": 44.0, "eval_LOCATION_f1": 0.9101123595505618, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8804347826086957, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.935933147632312, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9281767955801105, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04695841670036316, "eval_overall_accuracy": 0.9873144399460189, "eval_overall_f1": 0.9444444444444445, "eval_overall_precision": 0.935, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2878, "eval_samples_per_second": 590.607, "eval_steps_per_second": 10.422, "step": 4224 }, { "epoch": 45.0, "grad_norm": 1.3740448951721191, "learning_rate": 2.7500000000000004e-05, "loss": 0.0465, "step": 4320 }, { "epoch": 45.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9295774647887324, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9322033898305084, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04640668258070946, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9390862944162437, "eval_overall_precision": 0.9343434343434344, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2874, "eval_samples_per_second": 591.458, "eval_steps_per_second": 10.437, "step": 4320 }, { "epoch": 46.0, "grad_norm": 2.0288496017456055, "learning_rate": 2.7000000000000002e-05, "loss": 0.0468, "step": 4416 }, { "epoch": 46.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9157303370786517, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9157303370786517, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.04828597232699394, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.934010152284264, "eval_overall_precision": 0.9292929292929293, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.286, "eval_samples_per_second": 594.412, "eval_steps_per_second": 10.49, "step": 4416 }, { "epoch": 47.0, "grad_norm": 0.4440130293369293, "learning_rate": 2.6500000000000004e-05, "loss": 0.0432, "step": 4512 }, { "epoch": 47.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9157303370786517, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9157303370786517, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.04774380475282669, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.934010152284264, "eval_overall_precision": 0.9292929292929293, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.285, "eval_samples_per_second": 596.405, "eval_steps_per_second": 10.525, "step": 4512 }, { "epoch": 48.0, "grad_norm": 2.703061103820801, "learning_rate": 2.6000000000000002e-05, "loss": 0.0434, "step": 4608 }, { "epoch": 48.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9269662921348315, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9269662921348315, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04498521611094475, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2868, "eval_samples_per_second": 592.719, "eval_steps_per_second": 10.46, "step": 4608 }, { "epoch": 49.0, "grad_norm": 0.4383426308631897, "learning_rate": 2.5500000000000003e-05, "loss": 0.0442, "step": 4704 }, { "epoch": 49.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9147727272727272, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9252873563218391, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04639024659991264, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9312977099236641, "eval_overall_precision": 0.9289340101522843, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2853, "eval_samples_per_second": 595.961, "eval_steps_per_second": 10.517, "step": 4704 }, { "epoch": 50.0, "grad_norm": 1.1835976839065552, "learning_rate": 2.5e-05, "loss": 0.0421, "step": 4800 }, { "epoch": 50.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9243697478991597, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9217877094972067, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.047359008342027664, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9341772151898734, "eval_overall_precision": 0.9271356783919598, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2869, "eval_samples_per_second": 592.503, "eval_steps_per_second": 10.456, "step": 4800 }, { "epoch": 51.0, "grad_norm": 0.6422563791275024, "learning_rate": 2.45e-05, "loss": 0.0421, "step": 4896 }, { "epoch": 51.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9131652661064427, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9106145251396648, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04615306854248047, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9341772151898734, "eval_overall_precision": 0.9271356783919598, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2879, "eval_samples_per_second": 590.506, "eval_steps_per_second": 10.421, "step": 4896 }, { "epoch": 52.0, "grad_norm": 0.8085091710090637, "learning_rate": 2.4e-05, "loss": 0.0415, "step": 4992 }, { "epoch": 52.0, "eval_LOCATION_f1": 0.893854748603352, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8602150537634409, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9106145251396648, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9055555555555556, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.046080444008111954, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9292929292929293, "eval_overall_precision": 0.92, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.285, "eval_samples_per_second": 596.574, "eval_steps_per_second": 10.528, "step": 4992 }, { "epoch": 53.0, "grad_norm": 0.88947594165802, "learning_rate": 2.35e-05, "loss": 0.0418, "step": 5088 }, { "epoch": 53.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.907563025210084, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9050279329608939, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04545211419463158, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9279393173198484, "eval_overall_precision": 0.9197994987468672, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2859, "eval_samples_per_second": 594.675, "eval_steps_per_second": 10.494, "step": 5088 }, { "epoch": 54.0, "grad_norm": 1.4622700214385986, "learning_rate": 2.3000000000000003e-05, "loss": 0.0416, "step": 5184 }, { "epoch": 54.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9187675070028011, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9162011173184358, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0450156033039093, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9341772151898734, "eval_overall_precision": 0.9271356783919598, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2849, "eval_samples_per_second": 596.616, "eval_steps_per_second": 10.529, "step": 5184 }, { "epoch": 55.0, "grad_norm": 2.1605515480041504, "learning_rate": 2.25e-05, "loss": 0.0403, "step": 5280 }, { "epoch": 55.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9126760563380282, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9152542372881356, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0455675907433033, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9351969504447268, "eval_overall_precision": 0.9316455696202531, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2843, "eval_samples_per_second": 597.913, "eval_steps_per_second": 10.551, "step": 5280 }, { "epoch": 56.0, "grad_norm": 0.6271214485168457, "learning_rate": 2.2000000000000003e-05, "loss": 0.0424, "step": 5376 }, { "epoch": 56.0, "eval_LOCATION_f1": 0.9101123595505618, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8804347826086957, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9090909090909091, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9195402298850575, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04582790657877922, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9312977099236641, "eval_overall_precision": 0.9289340101522843, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2904, "eval_samples_per_second": 585.489, "eval_steps_per_second": 10.332, "step": 5376 }, { "epoch": 57.0, "grad_norm": 0.5556041598320007, "learning_rate": 2.15e-05, "loss": 0.0391, "step": 5472 }, { "epoch": 57.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9162011173184358, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9111111111111111, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04501010477542877, "eval_overall_accuracy": 0.9867746288798921, "eval_overall_f1": 0.9355246523388117, "eval_overall_precision": 0.9273182957393483, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2858, "eval_samples_per_second": 594.773, "eval_steps_per_second": 10.496, "step": 5472 }, { "epoch": 58.0, "grad_norm": 0.8634048104286194, "learning_rate": 2.1e-05, "loss": 0.039, "step": 5568 }, { "epoch": 58.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9101123595505618, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9101123595505618, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.046226970851421356, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9328263624841572, "eval_overall_precision": 0.9269521410579346, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2871, "eval_samples_per_second": 592.077, "eval_steps_per_second": 10.448, "step": 5568 }, { "epoch": 59.0, "grad_norm": 0.9014930725097656, "learning_rate": 2.05e-05, "loss": 0.0367, "step": 5664 }, { "epoch": 59.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.896358543417367, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8938547486033519, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04571150988340378, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.922882427307206, "eval_overall_precision": 0.9147869674185464, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2851, "eval_samples_per_second": 596.22, "eval_steps_per_second": 10.522, "step": 5664 }, { "epoch": 60.0, "grad_norm": 0.6190261840820312, "learning_rate": 2e-05, "loss": 0.0396, "step": 5760 }, { "epoch": 60.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9101123595505618, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9101123595505618, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0449722483754158, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.929113924050633, "eval_overall_precision": 0.9221105527638191, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2842, "eval_samples_per_second": 598.214, "eval_steps_per_second": 10.557, "step": 5760 }, { "epoch": 61.0, "grad_norm": 0.373140811920166, "learning_rate": 1.9500000000000003e-05, "loss": 0.038, "step": 5856 }, { "epoch": 61.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9183098591549295, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9209039548022598, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.045143794268369675, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.9390862944162437, "eval_overall_precision": 0.9343434343434344, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2846, "eval_samples_per_second": 597.242, "eval_steps_per_second": 10.54, "step": 5856 }, { "epoch": 62.0, "grad_norm": 0.694780707359314, "learning_rate": 1.9e-05, "loss": 0.0359, "step": 5952 }, { "epoch": 62.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.92090395480226, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9261363636363636, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04511195793747902, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9402795425667091, "eval_overall_precision": 0.9367088607594937, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2842, "eval_samples_per_second": 598.199, "eval_steps_per_second": 10.556, "step": 5952 }, { "epoch": 63.0, "grad_norm": 1.156940221786499, "learning_rate": 1.85e-05, "loss": 0.0366, "step": 6048 }, { "epoch": 63.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9213483146067416, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9213483146067416, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04561036080121994, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9429657794676806, "eval_overall_precision": 0.9370277078085643, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2866, "eval_samples_per_second": 593.14, "eval_steps_per_second": 10.467, "step": 6048 }, { "epoch": 64.0, "grad_norm": 0.13566291332244873, "learning_rate": 1.8e-05, "loss": 0.0346, "step": 6144 }, { "epoch": 64.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9157303370786517, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9157303370786517, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04518424719572067, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2877, "eval_samples_per_second": 590.965, "eval_steps_per_second": 10.429, "step": 6144 }, { "epoch": 65.0, "grad_norm": 0.4561346769332886, "learning_rate": 1.75e-05, "loss": 0.0338, "step": 6240 }, { "epoch": 65.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9187675070028011, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9162011173184358, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04571450874209404, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9417721518987343, "eval_overall_precision": 0.9346733668341709, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.285, "eval_samples_per_second": 596.57, "eval_steps_per_second": 10.528, "step": 6240 }, { "epoch": 66.0, "grad_norm": 0.8252725601196289, "learning_rate": 1.7000000000000003e-05, "loss": 0.0352, "step": 6336 }, { "epoch": 66.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9101123595505618, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9101123595505618, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.045532770454883575, "eval_overall_accuracy": 0.9873144399460189, "eval_overall_f1": 0.9302915082382762, "eval_overall_precision": 0.924433249370277, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2857, "eval_samples_per_second": 594.951, "eval_steps_per_second": 10.499, "step": 6336 }, { "epoch": 67.0, "grad_norm": 0.3995302617549896, "learning_rate": 1.65e-05, "loss": 0.0351, "step": 6432 }, { "epoch": 67.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9014084507042253, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.903954802259887, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04555808752775192, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9263959390862943, "eval_overall_precision": 0.9217171717171717, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2846, "eval_samples_per_second": 597.271, "eval_steps_per_second": 10.54, "step": 6432 }, { "epoch": 68.0, "grad_norm": 0.6343729496002197, "learning_rate": 1.6000000000000003e-05, "loss": 0.0333, "step": 6528 }, { "epoch": 68.0, "eval_LOCATION_f1": 0.9162011173184358, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8817204301075269, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9192200557103065, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9116022099447514, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0462430901825428, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9382093316519546, "eval_overall_precision": 0.9276807980049875, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2856, "eval_samples_per_second": 595.341, "eval_steps_per_second": 10.506, "step": 6528 }, { "epoch": 69.0, "grad_norm": 1.3333238363265991, "learning_rate": 1.55e-05, "loss": 0.0356, "step": 6624 }, { "epoch": 69.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9039548022598869, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9090909090909091, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04522768035531044, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.9312977099236641, "eval_overall_precision": 0.9289340101522843, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.285, "eval_samples_per_second": 596.518, "eval_steps_per_second": 10.527, "step": 6624 }, { "epoch": 70.0, "grad_norm": 0.09080830216407776, "learning_rate": 1.5e-05, "loss": 0.0336, "step": 6720 }, { "epoch": 70.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9239436619718311, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9265536723163842, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04545467719435692, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2842, "eval_samples_per_second": 598.124, "eval_steps_per_second": 10.555, "step": 6720 }, { "epoch": 71.0, "grad_norm": 1.2310019731521606, "learning_rate": 1.45e-05, "loss": 0.0331, "step": 6816 }, { "epoch": 71.0, "eval_LOCATION_f1": 0.9050279329608939, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8709677419354839, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9192200557103065, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9116022099447514, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04585807025432587, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.935687263556116, "eval_overall_precision": 0.9251870324189526, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2843, "eval_samples_per_second": 597.996, "eval_steps_per_second": 10.553, "step": 6816 }, { "epoch": 72.0, "grad_norm": 0.3007712960243225, "learning_rate": 1.4000000000000001e-05, "loss": 0.0351, "step": 6912 }, { "epoch": 72.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.898876404494382, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.898876404494382, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04686735197901726, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9252217997465146, "eval_overall_precision": 0.9193954659949622, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2838, "eval_samples_per_second": 598.994, "eval_steps_per_second": 10.57, "step": 6912 }, { "epoch": 73.0, "grad_norm": 0.7927353978157043, "learning_rate": 1.3500000000000001e-05, "loss": 0.0333, "step": 7008 }, { "epoch": 73.0, "eval_LOCATION_f1": 0.9162011173184358, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8817204301075269, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.92090395480226, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9261363636363636, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9725490196078432, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9763779527559056, "eval_PERSON_recall": 0.96875, "eval_loss": 0.04656846076250076, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2846, "eval_samples_per_second": 597.346, "eval_steps_per_second": 10.541, "step": 7008 }, { "epoch": 74.0, "grad_norm": 0.2926923632621765, "learning_rate": 1.3000000000000001e-05, "loss": 0.0345, "step": 7104 }, { "epoch": 74.0, "eval_LOCATION_f1": 0.9162011173184358, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8817204301075269, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9101123595505618, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9101123595505618, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04549047350883484, "eval_overall_accuracy": 0.9873144399460189, "eval_overall_f1": 0.9341772151898734, "eval_overall_precision": 0.9271356783919598, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2889, "eval_samples_per_second": 588.524, "eval_steps_per_second": 10.386, "step": 7104 }, { "epoch": 75.0, "grad_norm": 0.5930045247077942, "learning_rate": 1.25e-05, "loss": 0.033, "step": 7200 }, { "epoch": 75.0, "eval_LOCATION_f1": 0.9050279329608939, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8709677419354839, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.92090395480226, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9261363636363636, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04578697308897972, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2858, "eval_samples_per_second": 594.886, "eval_steps_per_second": 10.498, "step": 7200 }, { "epoch": 76.0, "grad_norm": 1.191828966140747, "learning_rate": 1.2e-05, "loss": 0.0334, "step": 7296 }, { "epoch": 76.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9152542372881356, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9204545454545454, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.045504000037908554, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9377382465057178, "eval_overall_precision": 0.9341772151898734, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.284, "eval_samples_per_second": 598.686, "eval_steps_per_second": 10.565, "step": 7296 }, { "epoch": 77.0, "grad_norm": 0.300650417804718, "learning_rate": 1.1500000000000002e-05, "loss": 0.0332, "step": 7392 }, { "epoch": 77.0, "eval_LOCATION_f1": 0.9050279329608939, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8709677419354839, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9213483146067416, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9213483146067416, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04422719031572342, "eval_overall_accuracy": 0.9883940620782726, "eval_overall_f1": 0.9367088607594937, "eval_overall_precision": 0.9296482412060302, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2856, "eval_samples_per_second": 595.156, "eval_steps_per_second": 10.503, "step": 7392 }, { "epoch": 78.0, "grad_norm": 0.8257962465286255, "learning_rate": 1.1000000000000001e-05, "loss": 0.0337, "step": 7488 }, { "epoch": 78.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9235127478753541, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9314285714285714, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04702431336045265, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9440203562340966, "eval_overall_precision": 0.9416243654822335, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2849, "eval_samples_per_second": 596.692, "eval_steps_per_second": 10.53, "step": 7488 }, { "epoch": 79.0, "grad_norm": 0.6366257667541504, "learning_rate": 1.05e-05, "loss": 0.0334, "step": 7584 }, { "epoch": 79.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9183098591549295, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9209039548022598, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.046494260430336, "eval_overall_accuracy": 0.9873144399460189, "eval_overall_f1": 0.9416243654822335, "eval_overall_precision": 0.9368686868686869, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2832, "eval_samples_per_second": 600.206, "eval_steps_per_second": 10.592, "step": 7584 }, { "epoch": 80.0, "grad_norm": 0.8169838786125183, "learning_rate": 1e-05, "loss": 0.0319, "step": 7680 }, { "epoch": 80.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9265536723163842, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9318181818181818, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04554834961891174, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9453621346886911, "eval_overall_precision": 0.9417721518987342, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2818, "eval_samples_per_second": 603.258, "eval_steps_per_second": 10.646, "step": 7680 }, { "epoch": 81.0, "grad_norm": 0.855148196220398, "learning_rate": 9.5e-06, "loss": 0.032, "step": 7776 }, { "epoch": 81.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9265536723163842, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9318181818181818, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04650643840432167, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.9453621346886911, "eval_overall_precision": 0.9417721518987342, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2841, "eval_samples_per_second": 598.476, "eval_steps_per_second": 10.561, "step": 7776 }, { "epoch": 82.0, "grad_norm": 1.168701171875, "learning_rate": 9e-06, "loss": 0.0328, "step": 7872 }, { "epoch": 82.0, "eval_LOCATION_f1": 0.9162011173184358, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8817204301075269, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9131652661064427, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9106145251396648, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.045022863894701004, "eval_overall_accuracy": 0.9883940620782726, "eval_overall_f1": 0.9355246523388117, "eval_overall_precision": 0.9273182957393483, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2828, "eval_samples_per_second": 601.135, "eval_steps_per_second": 10.608, "step": 7872 }, { "epoch": 83.0, "grad_norm": 1.8977534770965576, "learning_rate": 8.500000000000002e-06, "loss": 0.032, "step": 7968 }, { "epoch": 83.0, "eval_LOCATION_f1": 0.9162011173184358, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8817204301075269, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9131652661064427, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9106145251396648, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04489699751138687, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9355246523388117, "eval_overall_precision": 0.9273182957393483, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2844, "eval_samples_per_second": 597.649, "eval_steps_per_second": 10.547, "step": 7968 }, { "epoch": 84.0, "grad_norm": 2.0730838775634766, "learning_rate": 8.000000000000001e-06, "loss": 0.0309, "step": 8064 }, { "epoch": 84.0, "eval_LOCATION_f1": 0.9162011173184358, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8817204301075269, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9131652661064427, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9106145251396648, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04511245712637901, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9355246523388117, "eval_overall_precision": 0.9273182957393483, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2839, "eval_samples_per_second": 598.831, "eval_steps_per_second": 10.568, "step": 8064 }, { "epoch": 85.0, "grad_norm": 2.379471778869629, "learning_rate": 7.5e-06, "loss": 0.0315, "step": 8160 }, { "epoch": 85.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9152542372881356, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9204545454545454, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04549684375524521, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9377382465057178, "eval_overall_precision": 0.9341772151898734, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2845, "eval_samples_per_second": 597.627, "eval_steps_per_second": 10.546, "step": 8160 }, { "epoch": 86.0, "grad_norm": 0.44879671931266785, "learning_rate": 7.000000000000001e-06, "loss": 0.0305, "step": 8256 }, { "epoch": 86.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9239436619718311, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9265536723163842, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04563186317682266, "eval_overall_accuracy": 0.9878542510121457, "eval_overall_f1": 0.9441624365482234, "eval_overall_precision": 0.9393939393939394, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2843, "eval_samples_per_second": 597.955, "eval_steps_per_second": 10.552, "step": 8256 }, { "epoch": 87.0, "grad_norm": 0.4035642147064209, "learning_rate": 6.5000000000000004e-06, "loss": 0.0318, "step": 8352 }, { "epoch": 87.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9183098591549295, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9209039548022598, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.045748963952064514, "eval_overall_accuracy": 0.9873144399460189, "eval_overall_f1": 0.9416243654822335, "eval_overall_precision": 0.9368686868686869, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2843, "eval_samples_per_second": 597.956, "eval_steps_per_second": 10.552, "step": 8352 }, { "epoch": 88.0, "grad_norm": 0.8197627067565918, "learning_rate": 6e-06, "loss": 0.0317, "step": 8448 }, { "epoch": 88.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9183098591549295, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9209039548022598, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04592833295464516, "eval_overall_accuracy": 0.9873144399460189, "eval_overall_f1": 0.9416243654822335, "eval_overall_precision": 0.9368686868686869, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2836, "eval_samples_per_second": 599.492, "eval_steps_per_second": 10.579, "step": 8448 }, { "epoch": 89.0, "grad_norm": 0.5496847033500671, "learning_rate": 5.500000000000001e-06, "loss": 0.0319, "step": 8544 }, { "epoch": 89.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.92090395480226, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9261363636363636, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04627472907304764, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.9428208386277002, "eval_overall_precision": 0.9392405063291139, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2854, "eval_samples_per_second": 595.745, "eval_steps_per_second": 10.513, "step": 8544 }, { "epoch": 90.0, "grad_norm": 1.0043836832046509, "learning_rate": 5e-06, "loss": 0.0311, "step": 8640 }, { "epoch": 90.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9235127478753541, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9314285714285714, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04645315557718277, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9440203562340966, "eval_overall_precision": 0.9416243654822335, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2842, "eval_samples_per_second": 598.069, "eval_steps_per_second": 10.554, "step": 8640 }, { "epoch": 91.0, "grad_norm": 1.089453101158142, "learning_rate": 4.5e-06, "loss": 0.0297, "step": 8736 }, { "epoch": 91.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9178470254957507, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9257142857142857, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.046046558767557144, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.94147582697201, "eval_overall_precision": 0.9390862944162437, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2852, "eval_samples_per_second": 596.124, "eval_steps_per_second": 10.52, "step": 8736 }, { "epoch": 92.0, "grad_norm": 0.30409517884254456, "learning_rate": 4.000000000000001e-06, "loss": 0.0306, "step": 8832 }, { "epoch": 92.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9178470254957507, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9257142857142857, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.046223461627960205, "eval_overall_accuracy": 0.9875843454790824, "eval_overall_f1": 0.94147582697201, "eval_overall_precision": 0.9390862944162437, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2855, "eval_samples_per_second": 595.468, "eval_steps_per_second": 10.508, "step": 8832 }, { "epoch": 93.0, "grad_norm": 0.9712873101234436, "learning_rate": 3.5000000000000004e-06, "loss": 0.0335, "step": 8928 }, { "epoch": 93.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9121813031161473, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.92, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04603765904903412, "eval_overall_accuracy": 0.9870445344129555, "eval_overall_f1": 0.9363867684478372, "eval_overall_precision": 0.934010152284264, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2831, "eval_samples_per_second": 600.474, "eval_steps_per_second": 10.597, "step": 8928 }, { "epoch": 94.0, "grad_norm": 0.854708731174469, "learning_rate": 3e-06, "loss": 0.0288, "step": 9024 }, { "epoch": 94.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9265536723163842, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9318181818181818, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04617591202259064, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9453621346886911, "eval_overall_precision": 0.9417721518987342, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2851, "eval_samples_per_second": 596.338, "eval_steps_per_second": 10.524, "step": 9024 }, { "epoch": 95.0, "grad_norm": 0.3855931758880615, "learning_rate": 2.5e-06, "loss": 0.0296, "step": 9120 }, { "epoch": 95.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9261363636363636, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9367816091954023, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04585190489888191, "eval_overall_accuracy": 0.9881241565452091, "eval_overall_f1": 0.9452229299363057, "eval_overall_precision": 0.9440203562340967, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2829, "eval_samples_per_second": 600.961, "eval_steps_per_second": 10.605, "step": 9120 }, { "epoch": 96.0, "grad_norm": 0.2130727469921112, "learning_rate": 2.0000000000000003e-06, "loss": 0.0317, "step": 9216 }, { "epoch": 96.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9235127478753541, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9314285714285714, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04553066939115524, "eval_overall_accuracy": 0.9883940620782726, "eval_overall_f1": 0.9440203562340966, "eval_overall_precision": 0.9416243654822335, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2849, "eval_samples_per_second": 596.671, "eval_steps_per_second": 10.529, "step": 9216 }, { "epoch": 97.0, "grad_norm": 0.7633144855499268, "learning_rate": 1.5e-06, "loss": 0.0298, "step": 9312 }, { "epoch": 97.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9235127478753541, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9314285714285714, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0457141175866127, "eval_overall_accuracy": 0.9883940620782726, "eval_overall_f1": 0.9440203562340966, "eval_overall_precision": 0.9416243654822335, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2846, "eval_samples_per_second": 597.286, "eval_steps_per_second": 10.54, "step": 9312 }, { "epoch": 98.0, "grad_norm": 2.525777816772461, "learning_rate": 1.0000000000000002e-06, "loss": 0.0295, "step": 9408 }, { "epoch": 98.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9235127478753541, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9314285714285714, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04562585428357124, "eval_overall_accuracy": 0.9883940620782726, "eval_overall_f1": 0.9440203562340966, "eval_overall_precision": 0.9416243654822335, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2845, "eval_samples_per_second": 597.545, "eval_steps_per_second": 10.545, "step": 9408 }, { "epoch": 99.0, "grad_norm": 0.7906895279884338, "learning_rate": 5.000000000000001e-07, "loss": 0.0303, "step": 9504 }, { "epoch": 99.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9235127478753541, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9314285714285714, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0457700714468956, "eval_overall_accuracy": 0.9883940620782726, "eval_overall_f1": 0.9440203562340966, "eval_overall_precision": 0.9416243654822335, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2831, "eval_samples_per_second": 600.513, "eval_steps_per_second": 10.597, "step": 9504 }, { "epoch": 100.0, "grad_norm": 1.196627140045166, "learning_rate": 0.0, "loss": 0.0304, "step": 9600 }, { "epoch": 100.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9235127478753541, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9314285714285714, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04575352370738983, "eval_overall_accuracy": 0.9883940620782726, "eval_overall_f1": 0.9440203562340966, "eval_overall_precision": 0.9416243654822335, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2864, "eval_samples_per_second": 593.528, "eval_steps_per_second": 10.474, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 3894747787130916.0, "train_loss": 0.07688318540652593, "train_runtime": 549.6829, "train_samples_per_second": 278.524, "train_steps_per_second": 17.465 } ], "logging_steps": 500, "max_steps": 9600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 3894747787130916.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }