{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.4739856719970703, "learning_rate": 4.9500000000000004e-05, "loss": 1.1566, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.0, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.0, "eval_ORGANIZATION_recall": 0.0, "eval_PERSON_f1": 0.0, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, "eval_loss": 0.695199191570282, "eval_overall_accuracy": 0.8373377520022093, "eval_overall_f1": 0.0, "eval_overall_precision": 0.0, "eval_overall_recall": 0.0, "eval_runtime": 0.2694, "eval_samples_per_second": 630.973, "eval_steps_per_second": 11.135, "step": 96 }, { "epoch": 2.0, "grad_norm": 0.9800453186035156, "learning_rate": 4.9e-05, "loss": 0.6676, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.0, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.0, "eval_ORGANIZATION_recall": 0.0, "eval_PERSON_f1": 0.0, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, "eval_loss": 0.5652989745140076, "eval_overall_accuracy": 0.8376139188069595, "eval_overall_f1": 0.0, "eval_overall_precision": 0.0, "eval_overall_recall": 0.0, "eval_runtime": 0.2734, "eval_samples_per_second": 621.732, "eval_steps_per_second": 10.972, "step": 192 }, { "epoch": 3.0, "grad_norm": 1.1218823194503784, "learning_rate": 4.85e-05, "loss": 0.5559, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.0641711229946524, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.375, "eval_ORGANIZATION_recall": 0.03508771929824561, "eval_PERSON_f1": 0.08588957055214724, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.21875, "eval_PERSON_recall": 0.05343511450381679, "eval_loss": 0.44871625304222107, "eval_overall_accuracy": 0.8456227561447114, "eval_overall_f1": 0.05714285714285715, "eval_overall_precision": 0.26, "eval_overall_recall": 0.03209876543209877, "eval_runtime": 0.2727, "eval_samples_per_second": 623.419, "eval_steps_per_second": 11.002, "step": 288 }, { "epoch": 4.0, "grad_norm": 0.906775176525116, "learning_rate": 4.8e-05, "loss": 0.4455, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.07874015748031495, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.20833333333333334, "eval_LOCATION_recall": 0.04854368932038835, "eval_ORGANIZATION_f1": 0.2807017543859649, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.3508771929824561, "eval_ORGANIZATION_recall": 0.23391812865497075, "eval_PERSON_f1": 0.4098939929328622, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.3815789473684211, "eval_PERSON_recall": 0.44274809160305345, "eval_loss": 0.33893293142318726, "eval_overall_accuracy": 0.8818006075669704, "eval_overall_f1": 0.2964028776978417, "eval_overall_precision": 0.35517241379310344, "eval_overall_recall": 0.254320987654321, "eval_runtime": 0.2771, "eval_samples_per_second": 613.432, "eval_steps_per_second": 10.825, "step": 384 }, { "epoch": 5.0, "grad_norm": 0.6574228405952454, "learning_rate": 4.75e-05, "loss": 0.3416, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.3157894736842105, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.39705882352941174, "eval_LOCATION_recall": 0.2621359223300971, "eval_ORGANIZATION_f1": 0.5245901639344263, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.49230769230769234, "eval_ORGANIZATION_recall": 0.5614035087719298, "eval_PERSON_f1": 0.5847176079734219, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.5176470588235295, "eval_PERSON_recall": 0.6717557251908397, "eval_loss": 0.25828996300697327, "eval_overall_accuracy": 0.9207401270367301, "eval_overall_f1": 0.503579952267303, "eval_overall_precision": 0.48729792147806006, "eval_overall_recall": 0.5209876543209877, "eval_runtime": 0.2793, "eval_samples_per_second": 608.705, "eval_steps_per_second": 10.742, "step": 480 }, { "epoch": 6.0, "grad_norm": 1.1292903423309326, "learning_rate": 4.7e-05, "loss": 0.2637, "step": 576 }, { "epoch": 6.0, "eval_LOCATION_f1": 0.6060606060606061, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.631578947368421, "eval_LOCATION_recall": 0.5825242718446602, "eval_ORGANIZATION_f1": 0.712401055408971, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.6490384615384616, "eval_ORGANIZATION_recall": 0.7894736842105263, "eval_PERSON_f1": 0.767605633802817, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.7124183006535948, "eval_PERSON_recall": 0.8320610687022901, "eval_loss": 0.20057322084903717, "eval_overall_accuracy": 0.9489091411212373, "eval_overall_f1": 0.7061556329849014, "eval_overall_precision": 0.6666666666666666, "eval_overall_recall": 0.7506172839506173, "eval_runtime": 0.2738, "eval_samples_per_second": 620.929, "eval_steps_per_second": 10.958, "step": 576 }, { "epoch": 7.0, "grad_norm": 1.1348835229873657, "learning_rate": 4.6500000000000005e-05, "loss": 0.2115, "step": 672 }, { "epoch": 7.0, "eval_LOCATION_f1": 0.712871287128713, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.7272727272727273, "eval_LOCATION_recall": 0.6990291262135923, "eval_ORGANIZATION_f1": 0.7540106951871658, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.6945812807881774, "eval_ORGANIZATION_recall": 0.8245614035087719, "eval_PERSON_f1": 0.8945454545454545, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.8541666666666666, "eval_PERSON_recall": 0.9389312977099237, "eval_loss": 0.1648775041103363, "eval_overall_accuracy": 0.9585749792874897, "eval_overall_f1": 0.7896592244418331, "eval_overall_precision": 0.7533632286995515, "eval_overall_recall": 0.8296296296296296, "eval_runtime": 0.2742, "eval_samples_per_second": 620.066, "eval_steps_per_second": 10.942, "step": 672 }, { "epoch": 8.0, "grad_norm": 0.5443127751350403, "learning_rate": 4.600000000000001e-05, "loss": 0.1785, "step": 768 }, { "epoch": 8.0, "eval_LOCATION_f1": 0.797979797979798, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8315789473684211, "eval_LOCATION_recall": 0.7669902912621359, "eval_ORGANIZATION_f1": 0.7912087912087912, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.7461139896373057, "eval_ORGANIZATION_recall": 0.8421052631578947, "eval_PERSON_f1": 0.929889298892989, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.1342812180519104, "eval_overall_accuracy": 0.9652029826014913, "eval_overall_f1": 0.8379351740696278, "eval_overall_precision": 0.8154205607476636, "eval_overall_recall": 0.8617283950617284, "eval_runtime": 0.2747, "eval_samples_per_second": 618.771, "eval_steps_per_second": 10.919, "step": 768 }, { "epoch": 9.0, "grad_norm": 0.889248788356781, "learning_rate": 4.55e-05, "loss": 0.1541, "step": 864 }, { "epoch": 9.0, "eval_LOCATION_f1": 0.8217821782178217, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8383838383838383, "eval_LOCATION_recall": 0.8058252427184466, "eval_ORGANIZATION_f1": 0.814404432132964, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.7736842105263158, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9264705882352942, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.8936170212765957, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.11753819137811661, "eval_overall_accuracy": 0.968240817453742, "eval_overall_f1": 0.852694610778443, "eval_overall_precision": 0.827906976744186, "eval_overall_recall": 0.8790123456790123, "eval_runtime": 0.2808, "eval_samples_per_second": 605.329, "eval_steps_per_second": 10.682, "step": 864 }, { "epoch": 10.0, "grad_norm": 0.8680849075317383, "learning_rate": 4.5e-05, "loss": 0.1387, "step": 960 }, { "epoch": 10.0, "eval_LOCATION_f1": 0.8195121951219512, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8235294117647058, "eval_LOCATION_recall": 0.8155339805825242, "eval_ORGANIZATION_f1": 0.8287292817679557, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.7853403141361257, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9304029304029305, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.8943661971830986, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.10947263240814209, "eval_overall_accuracy": 0.9696216514774924, "eval_overall_f1": 0.8595238095238096, "eval_overall_precision": 0.8298850574712644, "eval_overall_recall": 0.891358024691358, "eval_runtime": 0.2731, "eval_samples_per_second": 622.547, "eval_steps_per_second": 10.986, "step": 960 }, { "epoch": 11.0, "grad_norm": 0.9291552901268005, "learning_rate": 4.4500000000000004e-05, "loss": 0.1275, "step": 1056 }, { "epoch": 11.0, "eval_LOCATION_f1": 0.8374384236453202, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.85, "eval_LOCATION_recall": 0.8252427184466019, "eval_ORGANIZATION_f1": 0.8333333333333334, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.7936507936507936, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.929889298892989, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.09946061670780182, "eval_overall_accuracy": 0.9710024855012428, "eval_overall_f1": 0.8657074340527577, "eval_overall_precision": 0.8414918414918415, "eval_overall_recall": 0.891358024691358, "eval_runtime": 0.277, "eval_samples_per_second": 613.762, "eval_steps_per_second": 10.831, "step": 1056 }, { "epoch": 12.0, "grad_norm": 0.6819791793823242, "learning_rate": 4.4000000000000006e-05, "loss": 0.1212, "step": 1152 }, { "epoch": 12.0, "eval_LOCATION_f1": 0.8640776699029126, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8640776699029126, "eval_LOCATION_recall": 0.8640776699029126, "eval_ORGANIZATION_f1": 0.837465564738292, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.7916666666666666, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.929889298892989, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.09351213276386261, "eval_overall_accuracy": 0.9723833195249931, "eval_overall_f1": 0.8738095238095238, "eval_overall_precision": 0.8436781609195402, "eval_overall_recall": 0.9061728395061729, "eval_runtime": 0.2767, "eval_samples_per_second": 614.42, "eval_steps_per_second": 10.843, "step": 1152 }, { "epoch": 13.0, "grad_norm": 1.4507771730422974, "learning_rate": 4.35e-05, "loss": 0.1164, "step": 1248 }, { "epoch": 13.0, "eval_LOCATION_f1": 0.8585365853658536, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8627450980392157, "eval_LOCATION_recall": 0.8543689320388349, "eval_ORGANIZATION_f1": 0.845303867403315, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8010471204188482, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.929889298892989, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.08751274645328522, "eval_overall_accuracy": 0.9723833195249931, "eval_overall_f1": 0.8758949880668259, "eval_overall_precision": 0.8475750577367206, "eval_overall_recall": 0.9061728395061729, "eval_runtime": 0.2748, "eval_samples_per_second": 618.687, "eval_steps_per_second": 10.918, "step": 1248 }, { "epoch": 14.0, "grad_norm": 0.5297168493270874, "learning_rate": 4.3e-05, "loss": 0.1105, "step": 1344 }, { "epoch": 14.0, "eval_LOCATION_f1": 0.8878048780487806, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8921568627450981, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8587896253602305, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8465909090909091, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9438202247191011, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9264705882352942, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.08204468339681625, "eval_overall_accuracy": 0.9768019884009942, "eval_overall_f1": 0.8937728937728937, "eval_overall_precision": 0.8840579710144928, "eval_overall_recall": 0.9037037037037037, "eval_runtime": 0.2712, "eval_samples_per_second": 626.816, "eval_steps_per_second": 11.061, "step": 1344 }, { "epoch": 15.0, "grad_norm": 0.8479740023612976, "learning_rate": 4.25e-05, "loss": 0.1063, "step": 1440 }, { "epoch": 15.0, "eval_LOCATION_f1": 0.89, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9175257731958762, "eval_LOCATION_recall": 0.8640776699029126, "eval_ORGANIZATION_f1": 0.8446866485013624, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.7908163265306123, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9333333333333332, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9064748201438849, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.07925137132406235, "eval_overall_accuracy": 0.9751449875724938, "eval_overall_f1": 0.8841099163679809, "eval_overall_precision": 0.8564814814814815, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2741, "eval_samples_per_second": 620.179, "eval_steps_per_second": 10.944, "step": 1440 }, { "epoch": 16.0, "grad_norm": 1.6623704433441162, "learning_rate": 4.2e-05, "loss": 0.1018, "step": 1536 }, { "epoch": 16.0, "eval_LOCATION_f1": 0.8846153846153847, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8761904761904762, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8555858310626703, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8010204081632653, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9333333333333332, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9064748201438849, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.07826149463653564, "eval_overall_accuracy": 0.9748688207677437, "eval_overall_f1": 0.8875739644970415, "eval_overall_precision": 0.8522727272727273, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2734, "eval_samples_per_second": 621.692, "eval_steps_per_second": 10.971, "step": 1536 }, { "epoch": 17.0, "grad_norm": 0.8166274428367615, "learning_rate": 4.15e-05, "loss": 0.0986, "step": 1632 }, { "epoch": 17.0, "eval_LOCATION_f1": 0.9019607843137256, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9108910891089109, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8627450980392157, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8279569892473119, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9548872180451129, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9407407407407408, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.07254834473133087, "eval_overall_accuracy": 0.9779066556199945, "eval_overall_f1": 0.9020556227327691, "eval_overall_precision": 0.8838862559241706, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.274, "eval_samples_per_second": 620.343, "eval_steps_per_second": 10.947, "step": 1632 }, { "epoch": 18.0, "grad_norm": 0.7182817459106445, "learning_rate": 4.1e-05, "loss": 0.093, "step": 1728 }, { "epoch": 18.0, "eval_LOCATION_f1": 0.8921568627450982, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8764044943820224, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8432432432432433, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9473684210526315, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9333333333333333, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.06929948180913925, "eval_overall_accuracy": 0.9779066556199945, "eval_overall_f1": 0.9031476997578692, "eval_overall_precision": 0.8859857482185273, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2726, "eval_samples_per_second": 623.704, "eval_steps_per_second": 11.007, "step": 1728 }, { "epoch": 19.0, "grad_norm": 1.4325016736984253, "learning_rate": 4.05e-05, "loss": 0.0897, "step": 1824 }, { "epoch": 19.0, "eval_LOCATION_f1": 0.8846153846153847, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8761904761904762, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8757062146892655, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8469945355191257, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9477611940298507, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.927007299270073, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.06990908831357956, "eval_overall_accuracy": 0.9781828224247445, "eval_overall_f1": 0.9012048192771085, "eval_overall_precision": 0.88, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2715, "eval_samples_per_second": 626.106, "eval_steps_per_second": 11.049, "step": 1824 }, { "epoch": 20.0, "grad_norm": 0.7544979453086853, "learning_rate": 4e-05, "loss": 0.0876, "step": 1920 }, { "epoch": 20.0, "eval_LOCATION_f1": 0.8888888888888888, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8846153846153846, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8611111111111113, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8201058201058201, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9333333333333332, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9064748201438849, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.06785619258880615, "eval_overall_accuracy": 0.9765258215962441, "eval_overall_f1": 0.8912783751493429, "eval_overall_precision": 0.8634259259259259, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2728, "eval_samples_per_second": 623.147, "eval_steps_per_second": 10.997, "step": 1920 }, { "epoch": 21.0, "grad_norm": 0.9009093046188354, "learning_rate": 3.9500000000000005e-05, "loss": 0.0846, "step": 2016 }, { "epoch": 21.0, "eval_LOCATION_f1": 0.8803827751196173, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8679245283018868, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8707865168539327, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8378378378378378, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9402985074626866, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9197080291970803, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.06543691456317902, "eval_overall_accuracy": 0.9784589892294946, "eval_overall_f1": 0.8955582232893157, "eval_overall_precision": 0.8714953271028038, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2743, "eval_samples_per_second": 619.702, "eval_steps_per_second": 10.936, "step": 2016 }, { "epoch": 22.0, "grad_norm": 1.1362422704696655, "learning_rate": 3.9000000000000006e-05, "loss": 0.0843, "step": 2112 }, { "epoch": 22.0, "eval_LOCATION_f1": 0.8932038834951457, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8932038834951457, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8784530386740331, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8324607329842932, "eval_ORGANIZATION_recall": 0.9298245614035088, "eval_PERSON_f1": 0.9402985074626866, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9197080291970803, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.06642135977745056, "eval_overall_accuracy": 0.9787351560342447, "eval_overall_f1": 0.9019138755980862, "eval_overall_precision": 0.8747099767981439, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2703, "eval_samples_per_second": 628.899, "eval_steps_per_second": 11.098, "step": 2112 }, { "epoch": 23.0, "grad_norm": 1.139490008354187, "learning_rate": 3.85e-05, "loss": 0.0823, "step": 2208 }, { "epoch": 23.0, "eval_LOCATION_f1": 0.8803827751196173, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8679245283018868, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8685714285714285, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8491620111731844, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.06113022193312645, "eval_overall_accuracy": 0.980115990057995, "eval_overall_f1": 0.902676399026764, "eval_overall_precision": 0.8896882494004796, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2712, "eval_samples_per_second": 626.885, "eval_steps_per_second": 11.063, "step": 2208 }, { "epoch": 24.0, "grad_norm": 0.9067116975784302, "learning_rate": 3.8e-05, "loss": 0.0808, "step": 2304 }, { "epoch": 24.0, "eval_LOCATION_f1": 0.8666666666666667, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8504672897196262, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8700564971751413, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8415300546448088, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0627431645989418, "eval_overall_accuracy": 0.9798398232532449, "eval_overall_f1": 0.8985507246376813, "eval_overall_precision": 0.8794326241134752, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2724, "eval_samples_per_second": 624.125, "eval_steps_per_second": 11.014, "step": 2304 }, { "epoch": 25.0, "grad_norm": 1.520267128944397, "learning_rate": 3.7500000000000003e-05, "loss": 0.0809, "step": 2400 }, { "epoch": 25.0, "eval_LOCATION_f1": 0.8792270531400966, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8732394366197183, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.842391304347826, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.05983828753232956, "eval_overall_accuracy": 0.9806683236674951, "eval_overall_f1": 0.9007263922518159, "eval_overall_precision": 0.8836104513064132, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.272, "eval_samples_per_second": 624.952, "eval_steps_per_second": 11.029, "step": 2400 }, { "epoch": 26.0, "grad_norm": 0.6365208029747009, "learning_rate": 3.7e-05, "loss": 0.078, "step": 2496 }, { "epoch": 26.0, "eval_LOCATION_f1": 0.8921568627450982, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8795518207282913, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8440860215053764, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9509433962264152, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9402985074626866, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.058052726089954376, "eval_overall_accuracy": 0.9817729908864954, "eval_overall_f1": 0.9055690072639224, "eval_overall_precision": 0.8883610451306413, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.273, "eval_samples_per_second": 622.679, "eval_steps_per_second": 10.988, "step": 2496 }, { "epoch": 27.0, "grad_norm": 1.504185676574707, "learning_rate": 3.65e-05, "loss": 0.0774, "step": 2592 }, { "epoch": 27.0, "eval_LOCATION_f1": 0.9064039408866995, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.92, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.888268156424581, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8502673796791443, "eval_ORGANIZATION_recall": 0.9298245614035088, "eval_PERSON_f1": 0.9509433962264152, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9402985074626866, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.058188218623399734, "eval_overall_accuracy": 0.9812206572769953, "eval_overall_f1": 0.9128329297820823, "eval_overall_precision": 0.8954869358669834, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2726, "eval_samples_per_second": 623.662, "eval_steps_per_second": 11.006, "step": 2592 }, { "epoch": 28.0, "grad_norm": 0.8313342928886414, "learning_rate": 3.6e-05, "loss": 0.0732, "step": 2688 }, { "epoch": 28.0, "eval_LOCATION_f1": 0.8975609756097561, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9019607843137255, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8857142857142858, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8659217877094972, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.062320366501808167, "eval_overall_accuracy": 0.9814968240817453, "eval_overall_f1": 0.9108669108669109, "eval_overall_precision": 0.9009661835748792, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.273, "eval_samples_per_second": 622.639, "eval_steps_per_second": 10.988, "step": 2688 }, { "epoch": 29.0, "grad_norm": 0.6814190149307251, "learning_rate": 3.55e-05, "loss": 0.0746, "step": 2784 }, { "epoch": 29.0, "eval_LOCATION_f1": 0.9019607843137256, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9108910891089109, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.9028571428571428, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.88268156424581, "eval_ORGANIZATION_recall": 0.9239766081871345, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.05526537075638771, "eval_overall_accuracy": 0.9828776581054958, "eval_overall_f1": 0.922888616891065, "eval_overall_precision": 0.9150485436893204, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2751, "eval_samples_per_second": 617.869, "eval_steps_per_second": 10.904, "step": 2784 }, { "epoch": 30.0, "grad_norm": 1.8307719230651855, "learning_rate": 3.5e-05, "loss": 0.0695, "step": 2880 }, { "epoch": 30.0, "eval_LOCATION_f1": 0.9019607843137256, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9108910891089109, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.9028571428571428, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.88268156424581, "eval_ORGANIZATION_recall": 0.9239766081871345, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.05361052602529526, "eval_overall_accuracy": 0.9831538249102458, "eval_overall_f1": 0.9193154034229828, "eval_overall_precision": 0.910411622276029, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2712, "eval_samples_per_second": 626.924, "eval_steps_per_second": 11.063, "step": 2880 }, { "epoch": 31.0, "grad_norm": 0.5288766026496887, "learning_rate": 3.45e-05, "loss": 0.0691, "step": 2976 }, { "epoch": 31.0, "eval_LOCATION_f1": 0.8846153846153847, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8761904761904762, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.893371757925072, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8806818181818182, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.05325145646929741, "eval_overall_accuracy": 0.9826014913007457, "eval_overall_f1": 0.9108669108669109, "eval_overall_precision": 0.9009661835748792, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2729, "eval_samples_per_second": 622.963, "eval_steps_per_second": 10.993, "step": 2976 }, { "epoch": 32.0, "grad_norm": 0.7085039019584656, "learning_rate": 3.4000000000000007e-05, "loss": 0.0665, "step": 3072 }, { "epoch": 32.0, "eval_LOCATION_f1": 0.883495145631068, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.883495145631068, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8825214899713466, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8651685393258427, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.05177022144198418, "eval_overall_accuracy": 0.9823253244959956, "eval_overall_f1": 0.905982905982906, "eval_overall_precision": 0.8961352657004831, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2709, "eval_samples_per_second": 627.553, "eval_steps_per_second": 11.074, "step": 3072 }, { "epoch": 33.0, "grad_norm": 0.6961383819580078, "learning_rate": 3.35e-05, "loss": 0.0649, "step": 3168 }, { "epoch": 33.0, "eval_LOCATION_f1": 0.8598130841121495, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8288288288288288, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.052676014602184296, "eval_overall_accuracy": 0.9809444904722452, "eval_overall_f1": 0.8990147783251232, "eval_overall_precision": 0.8968058968058968, "eval_overall_recall": 0.9012345679012346, "eval_runtime": 0.273, "eval_samples_per_second": 622.752, "eval_steps_per_second": 10.99, "step": 3168 }, { "epoch": 34.0, "grad_norm": 0.3737340569496155, "learning_rate": 3.3e-05, "loss": 0.0645, "step": 3264 }, { "epoch": 34.0, "eval_LOCATION_f1": 0.883495145631068, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.883495145631068, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.9043478260869565, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.896551724137931, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.05056975781917572, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9153374233128834, "eval_overall_precision": 0.9097560975609756, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2748, "eval_samples_per_second": 618.671, "eval_steps_per_second": 10.918, "step": 3264 }, { "epoch": 35.0, "grad_norm": 1.4867804050445557, "learning_rate": 3.2500000000000004e-05, "loss": 0.063, "step": 3360 }, { "epoch": 35.0, "eval_LOCATION_f1": 0.8666666666666667, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8504672897196262, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8888888888888888, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8888888888888888, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.051477495580911636, "eval_overall_accuracy": 0.9826014913007457, "eval_overall_f1": 0.9044117647058824, "eval_overall_precision": 0.8978102189781022, "eval_overall_recall": 0.9111111111111111, "eval_runtime": 0.2727, "eval_samples_per_second": 623.41, "eval_steps_per_second": 11.001, "step": 3360 }, { "epoch": 36.0, "grad_norm": 0.9459262490272522, "learning_rate": 3.2000000000000005e-05, "loss": 0.0637, "step": 3456 }, { "epoch": 36.0, "eval_LOCATION_f1": 0.8666666666666667, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8504672897196262, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8830409356725146, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8830409356725146, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.050841640681028366, "eval_overall_accuracy": 0.9817729908864954, "eval_overall_f1": 0.9019607843137256, "eval_overall_precision": 0.8953771289537713, "eval_overall_recall": 0.908641975308642, "eval_runtime": 0.2727, "eval_samples_per_second": 623.352, "eval_steps_per_second": 11.0, "step": 3456 }, { "epoch": 37.0, "grad_norm": 2.0613455772399902, "learning_rate": 3.15e-05, "loss": 0.0614, "step": 3552 }, { "epoch": 37.0, "eval_LOCATION_f1": 0.8921568627450982, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8977272727272728, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8729281767955801, "eval_ORGANIZATION_recall": 0.9239766081871345, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.049506776034832, "eval_overall_accuracy": 0.9828776581054958, "eval_overall_f1": 0.9146341463414634, "eval_overall_precision": 0.9036144578313253, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2728, "eval_samples_per_second": 623.161, "eval_steps_per_second": 10.997, "step": 3552 }, { "epoch": 38.0, "grad_norm": 0.5442711710929871, "learning_rate": 3.1e-05, "loss": 0.0599, "step": 3648 }, { "epoch": 38.0, "eval_LOCATION_f1": 0.8708133971291866, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8584905660377359, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8875739644970414, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8982035928143712, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.04945772886276245, "eval_overall_accuracy": 0.9820491576912456, "eval_overall_f1": 0.9050554870530209, "eval_overall_precision": 0.9039408866995073, "eval_overall_recall": 0.9061728395061729, "eval_runtime": 0.2707, "eval_samples_per_second": 628.033, "eval_steps_per_second": 11.083, "step": 3648 }, { "epoch": 39.0, "grad_norm": 0.5759150385856628, "learning_rate": 3.05e-05, "loss": 0.06, "step": 3744 }, { "epoch": 39.0, "eval_LOCATION_f1": 0.8720379146919431, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8518518518518519, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.877906976744186, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8728323699421965, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0495310053229332, "eval_overall_accuracy": 0.9820491576912456, "eval_overall_f1": 0.9057527539779682, "eval_overall_precision": 0.8980582524271845, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.277, "eval_samples_per_second": 613.735, "eval_steps_per_second": 10.831, "step": 3744 }, { "epoch": 40.0, "grad_norm": 0.95399409532547, "learning_rate": 3e-05, "loss": 0.0576, "step": 3840 }, { "epoch": 40.0, "eval_LOCATION_f1": 0.8750000000000001, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8666666666666667, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8941176470588235, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8994082840236687, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04800355061888695, "eval_overall_accuracy": 0.983706158519746, "eval_overall_f1": 0.9124537607891491, "eval_overall_precision": 0.9113300492610837, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2713, "eval_samples_per_second": 626.595, "eval_steps_per_second": 11.058, "step": 3840 }, { "epoch": 41.0, "grad_norm": 0.808504581451416, "learning_rate": 2.95e-05, "loss": 0.0597, "step": 3936 }, { "epoch": 41.0, "eval_LOCATION_f1": 0.8803827751196173, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8679245283018868, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8876080691642652, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.875, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.048507072031497955, "eval_overall_accuracy": 0.9828776581054958, "eval_overall_f1": 0.9108669108669109, "eval_overall_precision": 0.9009661835748792, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2721, "eval_samples_per_second": 624.82, "eval_steps_per_second": 11.026, "step": 3936 }, { "epoch": 42.0, "grad_norm": 0.5790802240371704, "learning_rate": 2.9e-05, "loss": 0.0581, "step": 4032 }, { "epoch": 42.0, "eval_LOCATION_f1": 0.8761904761904761, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8598130841121495, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.881159420289855, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8735632183908046, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04726165160536766, "eval_overall_accuracy": 0.9828776581054958, "eval_overall_f1": 0.9070904645476773, "eval_overall_precision": 0.8983050847457628, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2765, "eval_samples_per_second": 614.808, "eval_steps_per_second": 10.85, "step": 4032 }, { "epoch": 43.0, "grad_norm": 0.46468445658683777, "learning_rate": 2.8499999999999998e-05, "loss": 0.0597, "step": 4128 }, { "epoch": 43.0, "eval_LOCATION_f1": 0.8803827751196173, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8679245283018868, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.881159420289855, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8735632183908046, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.047872934490442276, "eval_overall_accuracy": 0.9826014913007457, "eval_overall_f1": 0.9093137254901962, "eval_overall_precision": 0.902676399026764, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2722, "eval_samples_per_second": 624.585, "eval_steps_per_second": 11.022, "step": 4128 }, { "epoch": 44.0, "grad_norm": 0.5926026105880737, "learning_rate": 2.8000000000000003e-05, "loss": 0.0568, "step": 4224 }, { "epoch": 44.0, "eval_LOCATION_f1": 0.8720379146919431, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8518518518518519, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8875739644970414, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8982035928143712, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04811384528875351, "eval_overall_accuracy": 0.9826014913007457, "eval_overall_f1": 0.9088669950738916, "eval_overall_precision": 0.9066339066339066, "eval_overall_recall": 0.9111111111111111, "eval_runtime": 0.2715, "eval_samples_per_second": 626.105, "eval_steps_per_second": 11.049, "step": 4224 }, { "epoch": 45.0, "grad_norm": 1.227881669998169, "learning_rate": 2.7500000000000004e-05, "loss": 0.0561, "step": 4320 }, { "epoch": 45.0, "eval_LOCATION_f1": 0.8720379146919431, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8518518518518519, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8823529411764705, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8875739644970414, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04695962369441986, "eval_overall_accuracy": 0.9834299917149959, "eval_overall_f1": 0.9077490774907748, "eval_overall_precision": 0.9044117647058824, "eval_overall_recall": 0.9111111111111111, "eval_runtime": 0.2758, "eval_samples_per_second": 616.422, "eval_steps_per_second": 10.878, "step": 4320 }, { "epoch": 46.0, "grad_norm": 1.023648738861084, "learning_rate": 2.7000000000000002e-05, "loss": 0.0552, "step": 4416 }, { "epoch": 46.0, "eval_LOCATION_f1": 0.8720379146919431, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8518518518518519, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.890207715133531, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9036144578313253, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04779404029250145, "eval_overall_accuracy": 0.983706158519746, "eval_overall_f1": 0.9111111111111111, "eval_overall_precision": 0.9111111111111111, "eval_overall_recall": 0.9111111111111111, "eval_runtime": 0.2711, "eval_samples_per_second": 627.02, "eval_steps_per_second": 11.065, "step": 4416 }, { "epoch": 47.0, "grad_norm": 0.36909419298171997, "learning_rate": 2.6500000000000004e-05, "loss": 0.0562, "step": 4512 }, { "epoch": 47.0, "eval_LOCATION_f1": 0.8846153846153847, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8761904761904762, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.8793103448275862, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.864406779661017, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.046062689274549484, "eval_overall_accuracy": 0.983982325324496, "eval_overall_f1": 0.9095354523227382, "eval_overall_precision": 0.9007263922518159, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.272, "eval_samples_per_second": 624.965, "eval_steps_per_second": 11.029, "step": 4512 }, { "epoch": 48.0, "grad_norm": 0.6459914445877075, "learning_rate": 2.6000000000000002e-05, "loss": 0.0533, "step": 4608 }, { "epoch": 48.0, "eval_LOCATION_f1": 0.8826291079812206, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8545454545454545, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8895522388059701, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9085365853658537, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04743105545639992, "eval_overall_accuracy": 0.983706158519746, "eval_overall_f1": 0.9135802469135802, "eval_overall_precision": 0.9135802469135802, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2736, "eval_samples_per_second": 621.325, "eval_steps_per_second": 10.965, "step": 4608 }, { "epoch": 49.0, "grad_norm": 0.17258469760417938, "learning_rate": 2.5500000000000003e-05, "loss": 0.0522, "step": 4704 }, { "epoch": 49.0, "eval_LOCATION_f1": 0.8785046728971964, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8468468468468469, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8771929824561403, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8771929824561403, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04609883204102516, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9070904645476773, "eval_overall_precision": 0.8983050847457628, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2713, "eval_samples_per_second": 626.665, "eval_steps_per_second": 11.059, "step": 4704 }, { "epoch": 50.0, "grad_norm": 1.7320947647094727, "learning_rate": 2.5e-05, "loss": 0.052, "step": 4800 }, { "epoch": 50.0, "eval_LOCATION_f1": 0.8878504672897196, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8558558558558559, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8869565217391304, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8793103448275862, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04637792333960533, "eval_overall_accuracy": 0.983982325324496, "eval_overall_f1": 0.9135200974421438, "eval_overall_precision": 0.9014423076923077, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.271, "eval_samples_per_second": 627.338, "eval_steps_per_second": 11.071, "step": 4800 }, { "epoch": 51.0, "grad_norm": 0.8508228063583374, "learning_rate": 2.45e-05, "loss": 0.054, "step": 4896 }, { "epoch": 51.0, "eval_LOCATION_f1": 0.8930232558139534, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8571428571428571, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8869047619047619, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9030303030303031, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.046650972217321396, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.915129151291513, "eval_overall_precision": 0.9117647058823529, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2718, "eval_samples_per_second": 625.424, "eval_steps_per_second": 11.037, "step": 4896 }, { "epoch": 52.0, "grad_norm": 0.6740456223487854, "learning_rate": 2.4e-05, "loss": 0.0525, "step": 4992 }, { "epoch": 52.0, "eval_LOCATION_f1": 0.9158878504672898, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8828828828828829, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.872093023255814, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8670520231213873, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04597054049372673, "eval_overall_accuracy": 0.983982325324496, "eval_overall_f1": 0.9146341463414634, "eval_overall_precision": 0.9036144578313253, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2723, "eval_samples_per_second": 624.335, "eval_steps_per_second": 11.018, "step": 4992 }, { "epoch": 53.0, "grad_norm": 1.4370176792144775, "learning_rate": 2.35e-05, "loss": 0.0501, "step": 5088 }, { "epoch": 53.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8816568047337278, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8922155688622755, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04658225178718567, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9164619164619164, "eval_overall_precision": 0.9119804400977995, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2733, "eval_samples_per_second": 622.121, "eval_steps_per_second": 10.979, "step": 5088 }, { "epoch": 54.0, "grad_norm": 1.064245343208313, "learning_rate": 2.3000000000000003e-05, "loss": 0.0498, "step": 5184 }, { "epoch": 54.0, "eval_LOCATION_f1": 0.8962264150943398, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8715596330275229, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.880466472303207, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.877906976744186, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04490743577480316, "eval_overall_accuracy": 0.983982325324496, "eval_overall_f1": 0.9130966952264382, "eval_overall_precision": 0.9053398058252428, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2734, "eval_samples_per_second": 621.817, "eval_steps_per_second": 10.973, "step": 5184 }, { "epoch": 55.0, "grad_norm": 2.0687813758850098, "learning_rate": 2.25e-05, "loss": 0.0504, "step": 5280 }, { "epoch": 55.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8875739644970414, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8982035928143712, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.045612651854753494, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.918918918918919, "eval_overall_precision": 0.9144254278728606, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2766, "eval_samples_per_second": 614.623, "eval_steps_per_second": 10.846, "step": 5280 }, { "epoch": 56.0, "grad_norm": 1.0828304290771484, "learning_rate": 2.2000000000000003e-05, "loss": 0.0486, "step": 5376 }, { "epoch": 56.0, "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8727272727272727, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.881159420289855, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8735632183908046, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.045335978269577026, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9146341463414634, "eval_overall_precision": 0.9036144578313253, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2731, "eval_samples_per_second": 622.495, "eval_steps_per_second": 10.985, "step": 5376 }, { "epoch": 57.0, "grad_norm": 1.297290563583374, "learning_rate": 2.15e-05, "loss": 0.0497, "step": 5472 }, { "epoch": 57.0, "eval_LOCATION_f1": 0.8940092165898617, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8508771929824561, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8875739644970414, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8982035928143712, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.045681606978178024, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9155446756425949, "eval_overall_precision": 0.9077669902912622, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2721, "eval_samples_per_second": 624.822, "eval_steps_per_second": 11.026, "step": 5472 }, { "epoch": 58.0, "grad_norm": 0.472746878862381, "learning_rate": 2.1e-05, "loss": 0.0487, "step": 5568 }, { "epoch": 58.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.890207715133531, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9036144578313253, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04600377753376961, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.920049200492005, "eval_overall_precision": 0.9166666666666666, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2747, "eval_samples_per_second": 618.786, "eval_steps_per_second": 10.92, "step": 5568 }, { "epoch": 59.0, "grad_norm": 0.6368147730827332, "learning_rate": 2.05e-05, "loss": 0.0473, "step": 5664 }, { "epoch": 59.0, "eval_LOCATION_f1": 0.8878504672897196, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8558558558558559, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8816568047337278, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8922155688622755, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.045563556253910065, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9115479115479117, "eval_overall_precision": 0.9070904645476773, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2709, "eval_samples_per_second": 627.644, "eval_steps_per_second": 11.076, "step": 5664 }, { "epoch": 60.0, "grad_norm": 0.35717150568962097, "learning_rate": 2e-05, "loss": 0.0463, "step": 5760 }, { "epoch": 60.0, "eval_LOCATION_f1": 0.8878504672897196, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8558558558558559, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8809523809523809, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.896969696969697, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04541854187846184, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9113300492610836, "eval_overall_precision": 0.9090909090909091, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2737, "eval_samples_per_second": 621.148, "eval_steps_per_second": 10.961, "step": 5760 }, { "epoch": 61.0, "grad_norm": 1.0991231203079224, "learning_rate": 1.9500000000000003e-05, "loss": 0.0486, "step": 5856 }, { "epoch": 61.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8809523809523809, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.896969696969697, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.045604679733514786, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.916256157635468, "eval_overall_precision": 0.914004914004914, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2735, "eval_samples_per_second": 621.534, "eval_steps_per_second": 10.968, "step": 5856 }, { "epoch": 62.0, "grad_norm": 1.2870984077453613, "learning_rate": 1.9e-05, "loss": 0.0484, "step": 5952 }, { "epoch": 62.0, "eval_LOCATION_f1": 0.8930232558139534, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8571428571428571, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8809523809523809, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.896969696969697, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04645540565252304, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9126691266912669, "eval_overall_precision": 0.9093137254901961, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2716, "eval_samples_per_second": 626.017, "eval_steps_per_second": 11.047, "step": 5952 }, { "epoch": 63.0, "grad_norm": 1.264487862586975, "learning_rate": 1.85e-05, "loss": 0.0461, "step": 6048 }, { "epoch": 63.0, "eval_LOCATION_f1": 0.9116279069767441, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8908554572271385, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8988095238095238, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04506193846464157, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9215686274509803, "eval_overall_precision": 0.9148418491484185, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2722, "eval_samples_per_second": 624.428, "eval_steps_per_second": 11.019, "step": 6048 }, { "epoch": 64.0, "grad_norm": 0.35966232419013977, "learning_rate": 1.8e-05, "loss": 0.0455, "step": 6144 }, { "epoch": 64.0, "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8727272727272727, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8842729970326408, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8975903614457831, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04505002498626709, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.916256157635468, "eval_overall_precision": 0.914004914004914, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2717, "eval_samples_per_second": 625.717, "eval_steps_per_second": 11.042, "step": 6144 }, { "epoch": 65.0, "grad_norm": 0.5010517239570618, "learning_rate": 1.75e-05, "loss": 0.0472, "step": 6240 }, { "epoch": 65.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8869047619047619, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9030303030303031, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04529740288853645, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.91871921182266, "eval_overall_precision": 0.9164619164619164, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2726, "eval_samples_per_second": 623.611, "eval_steps_per_second": 11.005, "step": 6240 }, { "epoch": 66.0, "grad_norm": 0.859156608581543, "learning_rate": 1.7000000000000003e-05, "loss": 0.0453, "step": 6336 }, { "epoch": 66.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8955223880597015, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9146341463414634, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04506004601716995, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9223181257706535, "eval_overall_precision": 0.9211822660098522, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.273, "eval_samples_per_second": 622.688, "eval_steps_per_second": 10.989, "step": 6336 }, { "epoch": 67.0, "grad_norm": 0.3856222927570343, "learning_rate": 1.65e-05, "loss": 0.045, "step": 6432 }, { "epoch": 67.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8895522388059701, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9085365853658537, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04503821209073067, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9198520345252774, "eval_overall_precision": 0.9187192118226601, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2725, "eval_samples_per_second": 623.806, "eval_steps_per_second": 11.008, "step": 6432 }, { "epoch": 68.0, "grad_norm": 0.5940878987312317, "learning_rate": 1.6000000000000003e-05, "loss": 0.0466, "step": 6528 }, { "epoch": 68.0, "eval_LOCATION_f1": 0.9201877934272301, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8909090909090909, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.890207715133531, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9036144578313253, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.044038835912942886, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9236453201970444, "eval_overall_precision": 0.9213759213759214, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2728, "eval_samples_per_second": 623.229, "eval_steps_per_second": 10.998, "step": 6528 }, { "epoch": 69.0, "grad_norm": 1.601568579673767, "learning_rate": 1.55e-05, "loss": 0.046, "step": 6624 }, { "epoch": 69.0, "eval_LOCATION_f1": 0.8971962616822431, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8648648648648649, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8895522388059701, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9085365853658537, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04461726173758507, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9173859432799014, "eval_overall_precision": 0.916256157635468, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2738, "eval_samples_per_second": 620.821, "eval_steps_per_second": 10.956, "step": 6624 }, { "epoch": 70.0, "grad_norm": 2.521439552307129, "learning_rate": 1.5e-05, "loss": 0.0436, "step": 6720 }, { "epoch": 70.0, "eval_LOCATION_f1": 0.8971962616822431, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8648648648648649, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8928571428571428, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9090909090909091, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04403162747621536, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.91871921182266, "eval_overall_precision": 0.9164619164619164, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.275, "eval_samples_per_second": 618.275, "eval_steps_per_second": 10.911, "step": 6720 }, { "epoch": 71.0, "grad_norm": 0.8414034247398376, "learning_rate": 1.45e-05, "loss": 0.0445, "step": 6816 }, { "epoch": 71.0, "eval_LOCATION_f1": 0.9074074074074074, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8672566371681416, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8908554572271385, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8988095238095238, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0446244440972805, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9204406364749083, "eval_overall_precision": 0.912621359223301, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2729, "eval_samples_per_second": 623.038, "eval_steps_per_second": 10.995, "step": 6816 }, { "epoch": 72.0, "grad_norm": 0.7483597993850708, "learning_rate": 1.4000000000000001e-05, "loss": 0.0437, "step": 6912 }, { "epoch": 72.0, "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8727272727272727, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8835820895522388, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9024390243902439, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04594691842794418, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9160493827160494, "eval_overall_precision": 0.9160493827160494, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2711, "eval_samples_per_second": 627.021, "eval_steps_per_second": 11.065, "step": 6912 }, { "epoch": 73.0, "grad_norm": 1.4202793836593628, "learning_rate": 1.3500000000000001e-05, "loss": 0.0434, "step": 7008 }, { "epoch": 73.0, "eval_LOCATION_f1": 0.9116279069767441, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8928571428571428, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9090909090909091, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0443597137928009, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.922509225092251, "eval_overall_precision": 0.9191176470588235, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2743, "eval_samples_per_second": 619.819, "eval_steps_per_second": 10.938, "step": 7008 }, { "epoch": 74.0, "grad_norm": 0.6511960625648499, "learning_rate": 1.3000000000000001e-05, "loss": 0.0441, "step": 7104 }, { "epoch": 74.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8928571428571428, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9090909090909091, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.044465843588113785, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9211822660098522, "eval_overall_precision": 0.918918918918919, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2712, "eval_samples_per_second": 626.86, "eval_steps_per_second": 11.062, "step": 7104 }, { "epoch": 75.0, "grad_norm": 0.9006913900375366, "learning_rate": 1.25e-05, "loss": 0.0439, "step": 7200 }, { "epoch": 75.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8928571428571428, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9090909090909091, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04460127279162407, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9211822660098522, "eval_overall_precision": 0.918918918918919, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2727, "eval_samples_per_second": 623.458, "eval_steps_per_second": 11.002, "step": 7200 }, { "epoch": 76.0, "grad_norm": 0.4795643389225006, "learning_rate": 1.2e-05, "loss": 0.042, "step": 7296 }, { "epoch": 76.0, "eval_LOCATION_f1": 0.9023255813953488, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8660714285714286, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8875739644970414, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8982035928143712, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.044701043516397476, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9177914110429448, "eval_overall_precision": 0.9121951219512195, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2713, "eval_samples_per_second": 626.658, "eval_steps_per_second": 11.059, "step": 7296 }, { "epoch": 77.0, "grad_norm": 0.5350348949432373, "learning_rate": 1.1500000000000002e-05, "loss": 0.0428, "step": 7392 }, { "epoch": 77.0, "eval_LOCATION_f1": 0.8971962616822431, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8648648648648649, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8895522388059701, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9085365853658537, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04485779628157616, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9173859432799014, "eval_overall_precision": 0.916256157635468, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2726, "eval_samples_per_second": 623.573, "eval_steps_per_second": 11.004, "step": 7392 }, { "epoch": 78.0, "grad_norm": 0.3859386444091797, "learning_rate": 1.1000000000000001e-05, "loss": 0.0435, "step": 7488 }, { "epoch": 78.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8928571428571428, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9090909090909091, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.044405534863471985, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9211822660098522, "eval_overall_precision": 0.918918918918919, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2722, "eval_samples_per_second": 624.535, "eval_steps_per_second": 11.021, "step": 7488 }, { "epoch": 79.0, "grad_norm": 1.7985838651657104, "learning_rate": 1.05e-05, "loss": 0.0416, "step": 7584 }, { "epoch": 79.0, "eval_LOCATION_f1": 0.9023255813953488, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8660714285714286, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8994082840236686, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9101796407185628, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04388272389769554, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9226993865030675, "eval_overall_precision": 0.9170731707317074, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2714, "eval_samples_per_second": 626.402, "eval_steps_per_second": 11.054, "step": 7584 }, { "epoch": 80.0, "grad_norm": 0.8757291436195374, "learning_rate": 1e-05, "loss": 0.0414, "step": 7680 }, { "epoch": 80.0, "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8727272727272727, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8961424332344213, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9096385542168675, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04357600957155228, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9211822660098522, "eval_overall_precision": 0.918918918918919, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2731, "eval_samples_per_second": 622.407, "eval_steps_per_second": 10.984, "step": 7680 }, { "epoch": 81.0, "grad_norm": 0.4684378504753113, "learning_rate": 9.5e-06, "loss": 0.043, "step": 7776 }, { "epoch": 81.0, "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8727272727272727, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043720223009586334, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.9223181257706535, "eval_overall_precision": 0.9211822660098522, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2724, "eval_samples_per_second": 624.084, "eval_steps_per_second": 11.013, "step": 7776 }, { "epoch": 82.0, "grad_norm": 0.9328840970993042, "learning_rate": 9e-06, "loss": 0.0433, "step": 7872 }, { "epoch": 82.0, "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8818181818181818, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9020771513353115, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9156626506024096, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04343878850340843, "eval_overall_accuracy": 0.9872963269814968, "eval_overall_f1": 0.9261083743842363, "eval_overall_precision": 0.9238329238329238, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2728, "eval_samples_per_second": 623.154, "eval_steps_per_second": 10.997, "step": 7872 }, { "epoch": 83.0, "grad_norm": 0.808896005153656, "learning_rate": 8.500000000000002e-06, "loss": 0.0428, "step": 7968 }, { "epoch": 83.0, "eval_LOCATION_f1": 0.9023255813953488, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8660714285714286, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9047619047619047, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9212121212121213, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043899569660425186, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9249692496924968, "eval_overall_precision": 0.9215686274509803, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2716, "eval_samples_per_second": 625.96, "eval_steps_per_second": 11.046, "step": 7968 }, { "epoch": 84.0, "grad_norm": 0.7827540040016174, "learning_rate": 8.000000000000001e-06, "loss": 0.0418, "step": 8064 }, { "epoch": 84.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9020771513353115, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9156626506024096, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04345087707042694, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9249692496924968, "eval_overall_precision": 0.9215686274509803, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2707, "eval_samples_per_second": 628.109, "eval_steps_per_second": 11.084, "step": 8064 }, { "epoch": 85.0, "grad_norm": 0.7387497425079346, "learning_rate": 7.5e-06, "loss": 0.0416, "step": 8160 }, { "epoch": 85.0, "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8727272727272727, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04350125417113304, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.9223181257706535, "eval_overall_precision": 0.9211822660098522, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2731, "eval_samples_per_second": 622.486, "eval_steps_per_second": 10.985, "step": 8160 }, { "epoch": 86.0, "grad_norm": 0.7260624170303345, "learning_rate": 7.000000000000001e-06, "loss": 0.0413, "step": 8256 }, { "epoch": 86.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9047619047619047, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9212121212121213, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04387590289115906, "eval_overall_accuracy": 0.9872963269814968, "eval_overall_f1": 0.9261083743842363, "eval_overall_precision": 0.9238329238329238, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2733, "eval_samples_per_second": 622.03, "eval_steps_per_second": 10.977, "step": 8256 }, { "epoch": 87.0, "grad_norm": 0.7930686473846436, "learning_rate": 6.5000000000000004e-06, "loss": 0.0423, "step": 8352 }, { "epoch": 87.0, "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8727272727272727, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8895522388059701, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9085365853658537, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04400918632745743, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9185185185185185, "eval_overall_precision": 0.9185185185185185, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2715, "eval_samples_per_second": 626.246, "eval_steps_per_second": 11.051, "step": 8352 }, { "epoch": 88.0, "grad_norm": 0.6405452489852905, "learning_rate": 6e-06, "loss": 0.0409, "step": 8448 }, { "epoch": 88.0, "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8818181818181818, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8955223880597015, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9146341463414634, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04390283301472664, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.9234567901234567, "eval_overall_precision": 0.9234567901234568, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2725, "eval_samples_per_second": 623.922, "eval_steps_per_second": 11.01, "step": 8448 }, { "epoch": 89.0, "grad_norm": 0.8073258996009827, "learning_rate": 5.500000000000001e-06, "loss": 0.0419, "step": 8544 }, { "epoch": 89.0, "eval_LOCATION_f1": 0.9023255813953488, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8660714285714286, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9047619047619047, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9212121212121213, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043653201311826706, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.9249692496924968, "eval_overall_precision": 0.9215686274509803, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.271, "eval_samples_per_second": 627.261, "eval_steps_per_second": 11.069, "step": 8544 }, { "epoch": 90.0, "grad_norm": 0.8635096549987793, "learning_rate": 5e-06, "loss": 0.0424, "step": 8640 }, { "epoch": 90.0, "eval_LOCATION_f1": 0.9023255813953488, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8660714285714286, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043814968317747116, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.922509225092251, "eval_overall_precision": 0.9191176470588235, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2729, "eval_samples_per_second": 622.855, "eval_steps_per_second": 10.992, "step": 8640 }, { "epoch": 91.0, "grad_norm": 0.7420637607574463, "learning_rate": 4.5e-06, "loss": 0.0419, "step": 8736 }, { "epoch": 91.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8955223880597015, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9146341463414634, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04388648644089699, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9223181257706535, "eval_overall_precision": 0.9211822660098522, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2731, "eval_samples_per_second": 622.382, "eval_steps_per_second": 10.983, "step": 8736 }, { "epoch": 92.0, "grad_norm": 0.2291034311056137, "learning_rate": 4.000000000000001e-06, "loss": 0.0427, "step": 8832 }, { "epoch": 92.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8955223880597015, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9146341463414634, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.044274233281612396, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9223181257706535, "eval_overall_precision": 0.9211822660098522, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2719, "eval_samples_per_second": 625.194, "eval_steps_per_second": 11.033, "step": 8832 }, { "epoch": 93.0, "grad_norm": 0.9127195477485657, "learning_rate": 3.5000000000000004e-06, "loss": 0.0397, "step": 8928 }, { "epoch": 93.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8955223880597015, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9146341463414634, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04379863291978836, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9223181257706535, "eval_overall_precision": 0.9211822660098522, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2732, "eval_samples_per_second": 622.226, "eval_steps_per_second": 10.98, "step": 8928 }, { "epoch": 94.0, "grad_norm": 1.1270931959152222, "learning_rate": 3e-06, "loss": 0.0414, "step": 9024 }, { "epoch": 94.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043688420206308365, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.9236453201970444, "eval_overall_precision": 0.9213759213759214, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2716, "eval_samples_per_second": 625.967, "eval_steps_per_second": 11.046, "step": 9024 }, { "epoch": 95.0, "grad_norm": 0.35525602102279663, "learning_rate": 2.5e-06, "loss": 0.0401, "step": 9120 }, { "epoch": 95.0, "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8818181818181818, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043763358145952225, "eval_overall_accuracy": 0.9872963269814968, "eval_overall_f1": 0.9247842170160296, "eval_overall_precision": 0.9236453201970444, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2719, "eval_samples_per_second": 625.255, "eval_steps_per_second": 11.034, "step": 9120 }, { "epoch": 96.0, "grad_norm": 0.21041779220104218, "learning_rate": 2.0000000000000003e-06, "loss": 0.0415, "step": 9216 }, { "epoch": 96.0, "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8727272727272727, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8955223880597015, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9146341463414634, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043890830129384995, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9209876543209876, "eval_overall_precision": 0.9209876543209876, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2726, "eval_samples_per_second": 623.718, "eval_steps_per_second": 11.007, "step": 9216 }, { "epoch": 97.0, "grad_norm": 0.8473381996154785, "learning_rate": 1.5e-06, "loss": 0.0404, "step": 9312 }, { "epoch": 97.0, "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8818181818181818, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043740756809711456, "eval_overall_accuracy": 0.9872963269814968, "eval_overall_f1": 0.9247842170160296, "eval_overall_precision": 0.9236453201970444, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2732, "eval_samples_per_second": 622.321, "eval_steps_per_second": 10.982, "step": 9312 }, { "epoch": 98.0, "grad_norm": 1.3456790447235107, "learning_rate": 1.0000000000000002e-06, "loss": 0.0418, "step": 9408 }, { "epoch": 98.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04376491159200668, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.9236453201970444, "eval_overall_precision": 0.9213759213759214, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2717, "eval_samples_per_second": 625.746, "eval_steps_per_second": 11.043, "step": 9408 }, { "epoch": 99.0, "grad_norm": 2.012141704559326, "learning_rate": 5.000000000000001e-07, "loss": 0.0388, "step": 9504 }, { "epoch": 99.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043746430426836014, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.9236453201970444, "eval_overall_precision": 0.9213759213759214, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2735, "eval_samples_per_second": 621.47, "eval_steps_per_second": 10.967, "step": 9504 }, { "epoch": 100.0, "grad_norm": 0.3253363370895386, "learning_rate": 0.0, "loss": 0.0397, "step": 9600 }, { "epoch": 100.0, "eval_LOCATION_f1": 0.9065420560747663, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8738738738738738, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04369127005338669, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.9236453201970444, "eval_overall_precision": 0.9213759213759214, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2733, "eval_samples_per_second": 622.123, "eval_steps_per_second": 10.979, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 3889996718638620.0, "train_loss": 0.09361960942546527, "train_runtime": 547.4358, "train_samples_per_second": 279.667, "train_steps_per_second": 17.536 } ], "logging_steps": 500, "max_steps": 9600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 3889996718638620.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }