{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.476663112640381, "learning_rate": 4.9500000000000004e-05, "loss": 0.2536, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.8585365853658538, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.7857142857142857, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.8264984227129337, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.8675496688741722, "eval_ORGANIZATION_recall": 0.7891566265060241, "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.062455397099256516, "eval_overall_accuracy": 0.9780521262002744, "eval_overall_f1": 0.8858560794044665, "eval_overall_precision": 0.8814814814814815, "eval_overall_recall": 0.8902743142144638, "eval_runtime": 0.2715, "eval_samples_per_second": 626.229, "eval_steps_per_second": 11.051, "step": 96 }, { "epoch": 2.0, "grad_norm": 1.269325613975525, "learning_rate": 4.9e-05, "loss": 0.0547, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.8855721393034827, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8240740740740741, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9102167182662539, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9363057324840764, "eval_ORGANIZATION_recall": 0.8855421686746988, "eval_PERSON_f1": 0.9681978798586572, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9716312056737588, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.05409156531095505, "eval_overall_accuracy": 0.9840877914951989, "eval_overall_f1": 0.9244114002478315, "eval_overall_precision": 0.9187192118226601, "eval_overall_recall": 0.9301745635910225, "eval_runtime": 0.2783, "eval_samples_per_second": 610.89, "eval_steps_per_second": 10.78, "step": 192 }, { "epoch": 3.0, "grad_norm": 1.5557774305343628, "learning_rate": 4.85e-05, "loss": 0.032, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9040247678018575, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9299363057324841, "eval_ORGANIZATION_recall": 0.8795180722891566, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.07121666520833969, "eval_overall_accuracy": 0.9849108367626886, "eval_overall_f1": 0.933832709113608, "eval_overall_precision": 0.935, "eval_overall_recall": 0.9326683291770573, "eval_runtime": 0.2764, "eval_samples_per_second": 615.126, "eval_steps_per_second": 10.855, "step": 288 }, { "epoch": 4.0, "grad_norm": 2.1740479469299316, "learning_rate": 4.8e-05, "loss": 0.0196, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.9239130434782609, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9340659340659341, "eval_LOCATION_recall": 0.9139784946236559, "eval_ORGANIZATION_f1": 0.9285714285714286, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9176470588235294, "eval_ORGANIZATION_recall": 0.9397590361445783, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.05732434615492821, "eval_overall_accuracy": 0.9879286694101509, "eval_overall_f1": 0.9464508094645082, "eval_overall_precision": 0.945273631840796, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2736, "eval_samples_per_second": 621.271, "eval_steps_per_second": 10.964, "step": 384 }, { "epoch": 5.0, "grad_norm": 0.028744017705321312, "learning_rate": 4.75e-05, "loss": 0.0114, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.8958333333333334, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8686868686868687, "eval_LOCATION_recall": 0.9247311827956989, "eval_ORGANIZATION_f1": 0.9096385542168675, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9096385542168675, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9681978798586572, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9716312056737588, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.07006135582923889, "eval_overall_accuracy": 0.9862825788751715, "eval_overall_f1": 0.9268897149938042, "eval_overall_precision": 0.9211822660098522, "eval_overall_recall": 0.9326683291770573, "eval_runtime": 0.2724, "eval_samples_per_second": 624.057, "eval_steps_per_second": 11.013, "step": 480 }, { "epoch": 6.0, "grad_norm": 4.791868686676025, "learning_rate": 4.7e-05, "loss": 0.0098, "step": 576 }, { "epoch": 6.0, "eval_LOCATION_f1": 0.8571428571428572, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.7909090909090909, "eval_LOCATION_recall": 0.9354838709677419, "eval_ORGANIZATION_f1": 0.8769716088328076, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9205298013245033, "eval_ORGANIZATION_recall": 0.8373493975903614, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.10465936362743378, "eval_overall_accuracy": 0.9794238683127572, "eval_overall_f1": 0.9066002490660026, "eval_overall_precision": 0.9054726368159204, "eval_overall_recall": 0.9077306733167082, "eval_runtime": 0.2868, "eval_samples_per_second": 592.777, "eval_steps_per_second": 10.461, "step": 576 }, { "epoch": 7.0, "grad_norm": 0.06045348197221756, "learning_rate": 4.6500000000000005e-05, "loss": 0.0096, "step": 672 }, { "epoch": 7.0, "eval_LOCATION_f1": 0.8979591836734693, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8543689320388349, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.9090909090909091, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9477124183006536, "eval_ORGANIZATION_recall": 0.8734939759036144, "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.0744900330901146, "eval_overall_accuracy": 0.9873799725651577, "eval_overall_f1": 0.9286608260325407, "eval_overall_precision": 0.9321608040201005, "eval_overall_recall": 0.9251870324189526, "eval_runtime": 0.2725, "eval_samples_per_second": 623.926, "eval_steps_per_second": 11.01, "step": 672 }, { "epoch": 8.0, "grad_norm": 0.21935367584228516, "learning_rate": 4.600000000000001e-05, "loss": 0.0088, "step": 768 }, { "epoch": 8.0, "eval_LOCATION_f1": 0.883248730964467, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8365384615384616, "eval_LOCATION_recall": 0.9354838709677419, "eval_ORGANIZATION_f1": 0.9079754601226994, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.925, "eval_ORGANIZATION_recall": 0.891566265060241, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.0766613706946373, "eval_overall_accuracy": 0.9871056241426612, "eval_overall_f1": 0.9267080745341615, "eval_overall_precision": 0.9232673267326733, "eval_overall_recall": 0.9301745635910225, "eval_runtime": 0.2785, "eval_samples_per_second": 610.503, "eval_steps_per_second": 10.774, "step": 768 }, { "epoch": 9.0, "grad_norm": 0.007653372827917337, "learning_rate": 4.55e-05, "loss": 0.007, "step": 864 }, { "epoch": 9.0, "eval_LOCATION_f1": 0.8989898989898989, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8476190476190476, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9107692307692308, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9308176100628931, "eval_ORGANIZATION_recall": 0.891566265060241, "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.08433980494737625, "eval_overall_accuracy": 0.9854595336076818, "eval_overall_f1": 0.9293680297397771, "eval_overall_precision": 0.9236453201970444, "eval_overall_recall": 0.9351620947630923, "eval_runtime": 0.2763, "eval_samples_per_second": 615.234, "eval_steps_per_second": 10.857, "step": 864 }, { "epoch": 10.0, "grad_norm": 0.24116267263889313, "learning_rate": 4.5e-05, "loss": 0.0073, "step": 960 }, { "epoch": 10.0, "eval_LOCATION_f1": 0.909090909090909, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8571428571428571, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.901840490797546, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.91875, "eval_ORGANIZATION_recall": 0.8855421686746988, "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.08327263593673706, "eval_overall_accuracy": 0.9865569272976681, "eval_overall_f1": 0.9282178217821783, "eval_overall_precision": 0.9213759213759214, "eval_overall_recall": 0.9351620947630923, "eval_runtime": 0.2748, "eval_samples_per_second": 618.53, "eval_steps_per_second": 10.915, "step": 960 }, { "epoch": 11.0, "grad_norm": 1.1158887147903442, "learning_rate": 4.4500000000000004e-05, "loss": 0.0044, "step": 1056 }, { "epoch": 11.0, "eval_LOCATION_f1": 0.9090909090909091, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9042553191489362, "eval_LOCATION_recall": 0.9139784946236559, "eval_ORGANIZATION_f1": 0.9320987654320988, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9556962025316456, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9788732394366197, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.07285378128290176, "eval_overall_accuracy": 0.9876543209876543, "eval_overall_f1": 0.9433962264150944, "eval_overall_precision": 0.9517766497461929, "eval_overall_recall": 0.9351620947630923, "eval_runtime": 0.2749, "eval_samples_per_second": 618.312, "eval_steps_per_second": 10.911, "step": 1056 }, { "epoch": 12.0, "grad_norm": 0.032768115401268005, "learning_rate": 4.4000000000000006e-05, "loss": 0.0049, "step": 1152 }, { "epoch": 12.0, "eval_LOCATION_f1": 0.8969072164948454, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8613861386138614, "eval_LOCATION_recall": 0.9354838709677419, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.07886822521686554, "eval_overall_accuracy": 0.9879286694101509, "eval_overall_f1": 0.9378109452736318, "eval_overall_precision": 0.9354838709677419, "eval_overall_recall": 0.940149625935162, "eval_runtime": 0.2862, "eval_samples_per_second": 594.06, "eval_steps_per_second": 10.483, "step": 1152 }, { "epoch": 13.0, "grad_norm": 0.015501847490668297, "learning_rate": 4.35e-05, "loss": 0.0034, "step": 1248 }, { "epoch": 13.0, "eval_LOCATION_f1": 0.9214659685863874, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8979591836734694, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.9300911854103343, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9386503067484663, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.07637546956539154, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9439601494396015, "eval_overall_precision": 0.9427860696517413, "eval_overall_recall": 0.9451371571072319, "eval_runtime": 0.2757, "eval_samples_per_second": 616.564, "eval_steps_per_second": 10.881, "step": 1248 }, { "epoch": 14.0, "grad_norm": 0.0688161626458168, "learning_rate": 4.3e-05, "loss": 0.0026, "step": 1344 }, { "epoch": 14.0, "eval_LOCATION_f1": 0.911917098445596, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.88, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.9179331306990882, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9263803680981595, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.08463358134031296, "eval_overall_accuracy": 0.9860082304526749, "eval_overall_f1": 0.9364881693648817, "eval_overall_precision": 0.9353233830845771, "eval_overall_recall": 0.9376558603491272, "eval_runtime": 0.2751, "eval_samples_per_second": 617.9, "eval_steps_per_second": 10.904, "step": 1344 }, { "epoch": 15.0, "grad_norm": 0.5560211539268494, "learning_rate": 4.25e-05, "loss": 0.0047, "step": 1440 }, { "epoch": 15.0, "eval_LOCATION_f1": 0.9081632653061223, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8640776699029126, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9051987767584098, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9192546583850931, "eval_ORGANIZATION_recall": 0.891566265060241, "eval_PERSON_f1": 0.9580419580419581, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9513888888888888, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.08817324042320251, "eval_overall_accuracy": 0.9862825788751715, "eval_overall_f1": 0.9245982694684796, "eval_overall_precision": 0.9166666666666666, "eval_overall_recall": 0.9326683291770573, "eval_runtime": 0.2737, "eval_samples_per_second": 621.104, "eval_steps_per_second": 10.961, "step": 1440 }, { "epoch": 16.0, "grad_norm": 1.6391551494598389, "learning_rate": 4.2e-05, "loss": 0.0034, "step": 1536 }, { "epoch": 16.0, "eval_LOCATION_f1": 0.8994708994708994, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8854166666666666, "eval_LOCATION_recall": 0.9139784946236559, "eval_ORGANIZATION_f1": 0.9240121580547112, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9325153374233128, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.08551391959190369, "eval_overall_accuracy": 0.9879286694101509, "eval_overall_f1": 0.9363295880149813, "eval_overall_precision": 0.9375, "eval_overall_recall": 0.9351620947630923, "eval_runtime": 0.2802, "eval_samples_per_second": 606.633, "eval_steps_per_second": 10.705, "step": 1536 }, { "epoch": 17.0, "grad_norm": 0.004688511602580547, "learning_rate": 4.15e-05, "loss": 0.0036, "step": 1632 }, { "epoch": 17.0, "eval_LOCATION_f1": 0.9411764705882354, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9361702127659575, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.9080118694362017, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.8947368421052632, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.08433548361063004, "eval_overall_accuracy": 0.9860082304526749, "eval_overall_f1": 0.9381188118811881, "eval_overall_precision": 0.9312039312039312, "eval_overall_recall": 0.9451371571072319, "eval_runtime": 0.2734, "eval_samples_per_second": 621.786, "eval_steps_per_second": 10.973, "step": 1632 }, { "epoch": 18.0, "grad_norm": 0.030324334278702736, "learning_rate": 4.1e-05, "loss": 0.0022, "step": 1728 }, { "epoch": 18.0, "eval_LOCATION_f1": 0.9128205128205128, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8725490196078431, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9107692307692308, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9308176100628931, "eval_ORGANIZATION_recall": 0.891566265060241, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.09843648225069046, "eval_overall_accuracy": 0.9862825788751715, "eval_overall_f1": 0.9351620947630923, "eval_overall_precision": 0.9351620947630923, "eval_overall_recall": 0.9351620947630923, "eval_runtime": 0.2768, "eval_samples_per_second": 614.248, "eval_steps_per_second": 10.84, "step": 1728 }, { "epoch": 19.0, "grad_norm": 0.16604948043823242, "learning_rate": 4.05e-05, "loss": 0.0038, "step": 1824 }, { "epoch": 19.0, "eval_LOCATION_f1": 0.8979591836734693, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8543689320388349, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.9230769230769232, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9433962264150944, "eval_ORGANIZATION_recall": 0.9036144578313253, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.08929400891065598, "eval_overall_accuracy": 0.9873799725651577, "eval_overall_f1": 0.9378109452736318, "eval_overall_precision": 0.9354838709677419, "eval_overall_recall": 0.940149625935162, "eval_runtime": 0.2742, "eval_samples_per_second": 620.088, "eval_steps_per_second": 10.943, "step": 1824 }, { "epoch": 20.0, "grad_norm": 0.09192885458469391, "learning_rate": 4e-05, "loss": 0.0023, "step": 1920 }, { "epoch": 20.0, "eval_LOCATION_f1": 0.90625, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8787878787878788, "eval_LOCATION_recall": 0.9354838709677419, "eval_ORGANIZATION_f1": 0.9329268292682927, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9444444444444444, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.08311018347740173, "eval_overall_accuracy": 0.988477366255144, "eval_overall_f1": 0.9413233458177278, "eval_overall_precision": 0.9425, "eval_overall_recall": 0.940149625935162, "eval_runtime": 0.2836, "eval_samples_per_second": 599.473, "eval_steps_per_second": 10.579, "step": 1920 }, { "epoch": 21.0, "grad_norm": 0.0658658966422081, "learning_rate": 3.9500000000000005e-05, "loss": 0.0018, "step": 2016 }, { "epoch": 21.0, "eval_LOCATION_f1": 0.9361702127659575, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9263157894736842, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.9294117647058823, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9080459770114943, "eval_ORGANIZATION_recall": 0.9518072289156626, "eval_PERSON_f1": 0.9614035087719298, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.958041958041958, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.08566827327013016, "eval_overall_accuracy": 0.9873799725651577, "eval_overall_f1": 0.942189421894219, "eval_overall_precision": 0.9296116504854369, "eval_overall_recall": 0.9551122194513716, "eval_runtime": 0.275, "eval_samples_per_second": 618.106, "eval_steps_per_second": 10.908, "step": 2016 }, { "epoch": 22.0, "grad_norm": 0.0013929170090705156, "learning_rate": 3.9000000000000006e-05, "loss": 0.0024, "step": 2112 }, { "epoch": 22.0, "eval_LOCATION_f1": 0.911917098445596, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.88, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.918429003021148, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9212121212121213, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.09154558926820755, "eval_overall_accuracy": 0.9879286694101509, "eval_overall_f1": 0.9366459627329192, "eval_overall_precision": 0.9331683168316832, "eval_overall_recall": 0.940149625935162, "eval_runtime": 0.274, "eval_samples_per_second": 620.334, "eval_steps_per_second": 10.947, "step": 2112 }, { "epoch": 23.0, "grad_norm": 0.0019474742002785206, "learning_rate": 3.85e-05, "loss": 0.0015, "step": 2208 }, { "epoch": 23.0, "eval_LOCATION_f1": 0.9128205128205128, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8725490196078431, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9207317073170733, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9320987654320988, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.968421052631579, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.965034965034965, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.08806966990232468, "eval_overall_accuracy": 0.9876543209876543, "eval_overall_f1": 0.9356435643564357, "eval_overall_precision": 0.9287469287469288, "eval_overall_recall": 0.942643391521197, "eval_runtime": 0.272, "eval_samples_per_second": 624.913, "eval_steps_per_second": 11.028, "step": 2208 }, { "epoch": 24.0, "grad_norm": 0.3652423620223999, "learning_rate": 3.8e-05, "loss": 0.0019, "step": 2304 }, { "epoch": 24.0, "eval_LOCATION_f1": 0.922279792746114, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9325153374233127, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.95, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.08749129623174667, "eval_overall_accuracy": 0.9893004115226337, "eval_overall_f1": 0.9463171036204744, "eval_overall_precision": 0.9475, "eval_overall_recall": 0.9451371571072319, "eval_runtime": 0.2759, "eval_samples_per_second": 616.122, "eval_steps_per_second": 10.873, "step": 2304 }, { "epoch": 25.0, "grad_norm": 0.2117108851671219, "learning_rate": 3.7500000000000003e-05, "loss": 0.001, "step": 2400 }, { "epoch": 25.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8811881188118812, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9287925696594428, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9554140127388535, "eval_ORGANIZATION_recall": 0.9036144578313253, "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.09758701175451279, "eval_overall_accuracy": 0.9882030178326474, "eval_overall_f1": 0.9413233458177278, "eval_overall_precision": 0.9425, "eval_overall_recall": 0.940149625935162, "eval_runtime": 0.2724, "eval_samples_per_second": 624.038, "eval_steps_per_second": 11.012, "step": 2400 }, { "epoch": 26.0, "grad_norm": 0.006529322825372219, "learning_rate": 3.7e-05, "loss": 0.003, "step": 2496 }, { "epoch": 26.0, "eval_LOCATION_f1": 0.9025641025641027, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8627450980392157, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.9146341463414634, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9259259259259259, "eval_ORGANIZATION_recall": 0.9036144578313253, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.08552427589893341, "eval_overall_accuracy": 0.9868312757201646, "eval_overall_f1": 0.9330024813895781, "eval_overall_precision": 0.928395061728395, "eval_overall_recall": 0.9376558603491272, "eval_runtime": 0.2723, "eval_samples_per_second": 624.395, "eval_steps_per_second": 11.019, "step": 2496 }, { "epoch": 27.0, "grad_norm": 0.0027512740343809128, "learning_rate": 3.65e-05, "loss": 0.0016, "step": 2592 }, { "epoch": 27.0, "eval_LOCATION_f1": 0.9081632653061223, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8640776699029126, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.91131498470948, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9254658385093167, "eval_ORGANIZATION_recall": 0.8975903614457831, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.09637561440467834, "eval_overall_accuracy": 0.9873799725651577, "eval_overall_f1": 0.9330024813895781, "eval_overall_precision": 0.928395061728395, "eval_overall_recall": 0.9376558603491272, "eval_runtime": 0.2734, "eval_samples_per_second": 621.853, "eval_steps_per_second": 10.974, "step": 2592 }, { "epoch": 28.0, "grad_norm": 0.08265668153762817, "learning_rate": 3.6e-05, "loss": 0.002, "step": 2688 }, { "epoch": 28.0, "eval_LOCATION_f1": 0.911917098445596, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.88, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.9179331306990882, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9263803680981595, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.09860537201166153, "eval_overall_accuracy": 0.9873799725651577, "eval_overall_f1": 0.9378109452736318, "eval_overall_precision": 0.9354838709677419, "eval_overall_recall": 0.940149625935162, "eval_runtime": 0.2701, "eval_samples_per_second": 629.413, "eval_steps_per_second": 11.107, "step": 2688 }, { "epoch": 29.0, "grad_norm": 0.0008096226374618709, "learning_rate": 3.55e-05, "loss": 0.0022, "step": 2784 }, { "epoch": 29.0, "eval_LOCATION_f1": 0.9214659685863874, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8979591836734694, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.920245398773006, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9375, "eval_ORGANIZATION_recall": 0.9036144578313253, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.09792660176753998, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.94, "eval_overall_precision": 0.9423558897243107, "eval_overall_recall": 0.9376558603491272, "eval_runtime": 0.2829, "eval_samples_per_second": 600.929, "eval_steps_per_second": 10.605, "step": 2784 }, { "epoch": 30.0, "grad_norm": 4.448439121246338, "learning_rate": 3.5e-05, "loss": 0.0021, "step": 2880 }, { "epoch": 30.0, "eval_LOCATION_f1": 0.9319371727748691, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9081632653061225, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9272727272727274, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9329268292682927, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.0972193107008934, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9477611940298507, "eval_overall_precision": 0.9454094292803971, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2769, "eval_samples_per_second": 613.898, "eval_steps_per_second": 10.833, "step": 2880 }, { "epoch": 31.0, "grad_norm": 0.001308169448748231, "learning_rate": 3.45e-05, "loss": 0.0017, "step": 2976 }, { "epoch": 31.0, "eval_LOCATION_f1": 0.9278350515463919, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.11491724103689194, "eval_overall_accuracy": 0.9876543209876543, "eval_overall_f1": 0.9440993788819876, "eval_overall_precision": 0.9405940594059405, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2785, "eval_samples_per_second": 610.473, "eval_steps_per_second": 10.773, "step": 2976 }, { "epoch": 32.0, "grad_norm": 0.0011797519400715828, "learning_rate": 3.4000000000000007e-05, "loss": 0.0016, "step": 3072 }, { "epoch": 32.0, "eval_LOCATION_f1": 0.911917098445596, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.88, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.09680671989917755, "eval_overall_accuracy": 0.988477366255144, "eval_overall_f1": 0.942643391521197, "eval_overall_precision": 0.942643391521197, "eval_overall_recall": 0.942643391521197, "eval_runtime": 0.283, "eval_samples_per_second": 600.797, "eval_steps_per_second": 10.602, "step": 3072 }, { "epoch": 33.0, "grad_norm": 0.01620068959891796, "learning_rate": 3.35e-05, "loss": 0.0011, "step": 3168 }, { "epoch": 33.0, "eval_LOCATION_f1": 0.9157894736842105, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8969072164948454, "eval_LOCATION_recall": 0.9354838709677419, "eval_ORGANIZATION_f1": 0.9300911854103343, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9386503067484663, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.08883913606405258, "eval_overall_accuracy": 0.9893004115226337, "eval_overall_f1": 0.942643391521197, "eval_overall_precision": 0.942643391521197, "eval_overall_recall": 0.942643391521197, "eval_runtime": 0.2727, "eval_samples_per_second": 623.369, "eval_steps_per_second": 11.001, "step": 3168 }, { "epoch": 34.0, "grad_norm": 0.16773520410060883, "learning_rate": 3.3e-05, "loss": 0.0018, "step": 3264 }, { "epoch": 34.0, "eval_LOCATION_f1": 0.9278350515463919, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9329268292682927, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9444444444444444, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9754385964912281, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.972027972027972, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.08975508064031601, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.946716232961586, "eval_overall_precision": 0.9408866995073891, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2765, "eval_samples_per_second": 614.86, "eval_steps_per_second": 10.85, "step": 3264 }, { "epoch": 35.0, "grad_norm": 13.28307819366455, "learning_rate": 3.2500000000000004e-05, "loss": 0.0008, "step": 3360 }, { "epoch": 35.0, "eval_LOCATION_f1": 0.9285714285714286, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.883495145631068, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9263803680981595, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.94375, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.0988275557756424, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.946583850931677, "eval_overall_precision": 0.943069306930693, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2719, "eval_samples_per_second": 625.163, "eval_steps_per_second": 11.032, "step": 3360 }, { "epoch": 36.0, "grad_norm": 0.00786670297384262, "learning_rate": 3.2000000000000005e-05, "loss": 0.0025, "step": 3456 }, { "epoch": 36.0, "eval_LOCATION_f1": 0.8989898989898989, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8476190476190476, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9325153374233127, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.95, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9754385964912281, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.972027972027972, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.09047625958919525, "eval_overall_accuracy": 0.988477366255144, "eval_overall_f1": 0.9394313967861558, "eval_overall_precision": 0.9313725490196079, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2826, "eval_samples_per_second": 601.492, "eval_steps_per_second": 10.615, "step": 3456 }, { "epoch": 37.0, "grad_norm": 0.0032606797758489847, "learning_rate": 3.15e-05, "loss": 0.0023, "step": 3552 }, { "epoch": 37.0, "eval_LOCATION_f1": 0.9319371727748691, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9081632653061225, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9390243902439024, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9506172839506173, "eval_ORGANIZATION_recall": 0.927710843373494, "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9788732394366197, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.09261784702539444, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9514321295143213, "eval_overall_precision": 0.9502487562189055, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2735, "eval_samples_per_second": 621.499, "eval_steps_per_second": 10.968, "step": 3552 }, { "epoch": 38.0, "grad_norm": 0.0010140871163457632, "learning_rate": 3.1e-05, "loss": 0.0019, "step": 3648 }, { "epoch": 38.0, "eval_LOCATION_f1": 0.9312169312169313, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9166666666666666, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.9285714285714286, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9176470588235294, "eval_ORGANIZATION_recall": 0.9397590361445783, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.10431136935949326, "eval_overall_accuracy": 0.9879286694101509, "eval_overall_f1": 0.9455445544554455, "eval_overall_precision": 0.9385749385749386, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2746, "eval_samples_per_second": 619.086, "eval_steps_per_second": 10.925, "step": 3648 }, { "epoch": 39.0, "grad_norm": 0.012870563194155693, "learning_rate": 3.05e-05, "loss": 0.0016, "step": 3744 }, { "epoch": 39.0, "eval_LOCATION_f1": 0.9374999999999999, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9090909090909091, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.972027972027972, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9652777777777778, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.10107599198818207, "eval_overall_accuracy": 0.9879286694101509, "eval_overall_f1": 0.946583850931677, "eval_overall_precision": 0.943069306930693, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2736, "eval_samples_per_second": 621.263, "eval_steps_per_second": 10.963, "step": 3744 }, { "epoch": 40.0, "grad_norm": 0.0019054836593568325, "learning_rate": 3e-05, "loss": 0.0017, "step": 3840 }, { "epoch": 40.0, "eval_LOCATION_f1": 0.9072164948453608, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8712871287128713, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.920245398773006, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9375, "eval_ORGANIZATION_recall": 0.9036144578313253, "eval_PERSON_f1": 0.968421052631579, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.965034965034965, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.10998252779245377, "eval_overall_accuracy": 0.9868312757201646, "eval_overall_f1": 0.9341614906832298, "eval_overall_precision": 0.9306930693069307, "eval_overall_recall": 0.9376558603491272, "eval_runtime": 0.274, "eval_samples_per_second": 620.383, "eval_steps_per_second": 10.948, "step": 3840 }, { "epoch": 41.0, "grad_norm": 0.0006370858754962683, "learning_rate": 2.95e-05, "loss": 0.0014, "step": 3936 }, { "epoch": 41.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8811881188118812, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9009009009009009, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.8982035928143712, "eval_ORGANIZATION_recall": 0.9036144578313253, "eval_PERSON_f1": 0.9716312056737589, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9785714285714285, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.12568451464176178, "eval_overall_accuracy": 0.9851851851851852, "eval_overall_f1": 0.9295426452410381, "eval_overall_precision": 0.9215686274509803, "eval_overall_recall": 0.9376558603491272, "eval_runtime": 0.2719, "eval_samples_per_second": 625.128, "eval_steps_per_second": 11.032, "step": 3936 }, { "epoch": 42.0, "grad_norm": 0.0029636453837156296, "learning_rate": 2.9e-05, "loss": 0.0021, "step": 4032 }, { "epoch": 42.0, "eval_LOCATION_f1": 0.9278350515463919, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9226006191950464, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9490445859872612, "eval_ORGANIZATION_recall": 0.8975903614457831, "eval_PERSON_f1": 0.9754385964912281, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.972027972027972, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.10770393908023834, "eval_overall_accuracy": 0.988477366255144, "eval_overall_f1": 0.942643391521197, "eval_overall_precision": 0.942643391521197, "eval_overall_recall": 0.942643391521197, "eval_runtime": 0.2728, "eval_samples_per_second": 623.215, "eval_steps_per_second": 10.998, "step": 4032 }, { "epoch": 43.0, "grad_norm": 0.06515736877918243, "learning_rate": 2.8499999999999998e-05, "loss": 0.0026, "step": 4128 }, { "epoch": 43.0, "eval_LOCATION_f1": 0.9128205128205128, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8725490196078431, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9057750759878419, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9141104294478528, "eval_ORGANIZATION_recall": 0.8975903614457831, "eval_PERSON_f1": 0.968421052631579, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.965034965034965, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.1268494725227356, "eval_overall_accuracy": 0.9838134430727024, "eval_overall_f1": 0.9295426452410381, "eval_overall_precision": 0.9215686274509803, "eval_overall_recall": 0.9376558603491272, "eval_runtime": 0.2743, "eval_samples_per_second": 619.661, "eval_steps_per_second": 10.935, "step": 4128 }, { "epoch": 44.0, "grad_norm": 0.0020745082292705774, "learning_rate": 2.8000000000000003e-05, "loss": 0.0016, "step": 4224 }, { "epoch": 44.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9345794392523364, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.967741935483871, "eval_ORGANIZATION_recall": 0.9036144578313253, "eval_PERSON_f1": 0.9647887323943662, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9647887323943662, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.11049126833677292, "eval_overall_accuracy": 0.9865569272976681, "eval_overall_f1": 0.9424999999999999, "eval_overall_precision": 0.9448621553884712, "eval_overall_recall": 0.940149625935162, "eval_runtime": 0.2748, "eval_samples_per_second": 618.533, "eval_steps_per_second": 10.915, "step": 4224 }, { "epoch": 45.0, "grad_norm": 0.0009372663334943354, "learning_rate": 2.7500000000000004e-05, "loss": 0.0013, "step": 4320 }, { "epoch": 45.0, "eval_LOCATION_f1": 0.922279792746114, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9156626506024096, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9156626506024096, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.12877264618873596, "eval_overall_accuracy": 0.9849108367626886, "eval_overall_f1": 0.9369592088998765, "eval_overall_precision": 0.928921568627451, "eval_overall_recall": 0.9451371571072319, "eval_runtime": 0.2726, "eval_samples_per_second": 623.712, "eval_steps_per_second": 11.007, "step": 4320 }, { "epoch": 46.0, "grad_norm": 0.01688425801694393, "learning_rate": 2.7000000000000002e-05, "loss": 0.0014, "step": 4416 }, { "epoch": 46.0, "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.9014925373134328, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.893491124260355, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.951048951048951, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9444444444444444, "eval_PERSON_recall": 0.9577464788732394, "eval_loss": 0.147969052195549, "eval_overall_accuracy": 0.9821673525377229, "eval_overall_f1": 0.9225092250922511, "eval_overall_precision": 0.9101941747572816, "eval_overall_recall": 0.9351620947630923, "eval_runtime": 0.2763, "eval_samples_per_second": 615.276, "eval_steps_per_second": 10.858, "step": 4416 }, { "epoch": 47.0, "grad_norm": 0.0011194701073691249, "learning_rate": 2.6500000000000004e-05, "loss": 0.0013, "step": 4512 }, { "epoch": 47.0, "eval_LOCATION_f1": 0.9278350515463919, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9325153374233127, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.95, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.10752855241298676, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9476309226932669, "eval_overall_precision": 0.9476309226932669, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2732, "eval_samples_per_second": 622.216, "eval_steps_per_second": 10.98, "step": 4512 }, { "epoch": 48.0, "grad_norm": 0.0004208228492643684, "learning_rate": 2.6000000000000002e-05, "loss": 0.0008, "step": 4608 }, { "epoch": 48.0, "eval_LOCATION_f1": 0.922279792746114, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9141104294478528, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.93125, "eval_ORGANIZATION_recall": 0.8975903614457831, "eval_PERSON_f1": 0.9716312056737589, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9785714285714285, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.11435800790786743, "eval_overall_accuracy": 0.9868312757201646, "eval_overall_f1": 0.9363295880149813, "eval_overall_precision": 0.9375, "eval_overall_recall": 0.9351620947630923, "eval_runtime": 0.2741, "eval_samples_per_second": 620.241, "eval_steps_per_second": 10.945, "step": 4608 }, { "epoch": 49.0, "grad_norm": 0.0018427808536216617, "learning_rate": 2.5500000000000003e-05, "loss": 0.0016, "step": 4704 }, { "epoch": 49.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9320987654320988, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9556962025316456, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.1204075962305069, "eval_overall_accuracy": 0.9871056241426612, "eval_overall_f1": 0.9451371571072319, "eval_overall_precision": 0.9451371571072319, "eval_overall_recall": 0.9451371571072319, "eval_runtime": 0.2763, "eval_samples_per_second": 615.3, "eval_steps_per_second": 10.858, "step": 4704 }, { "epoch": 50.0, "grad_norm": 0.006291194353252649, "learning_rate": 2.5e-05, "loss": 0.0018, "step": 4800 }, { "epoch": 50.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9235474006116209, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.937888198757764, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.11503820866346359, "eval_overall_accuracy": 0.9873799725651577, "eval_overall_f1": 0.9427860696517413, "eval_overall_precision": 0.9404466501240695, "eval_overall_recall": 0.9451371571072319, "eval_runtime": 0.2727, "eval_samples_per_second": 623.285, "eval_steps_per_second": 10.999, "step": 4800 }, { "epoch": 51.0, "grad_norm": 0.0002592083183117211, "learning_rate": 2.45e-05, "loss": 0.0008, "step": 4896 }, { "epoch": 51.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8811881188118812, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9386503067484663, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.95625, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.11824705451726913, "eval_overall_accuracy": 0.9882030178326474, "eval_overall_f1": 0.9464508094645082, "eval_overall_precision": 0.945273631840796, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2727, "eval_samples_per_second": 623.382, "eval_steps_per_second": 11.001, "step": 4896 }, { "epoch": 52.0, "grad_norm": 1.07253098487854, "learning_rate": 2.4e-05, "loss": 0.0009, "step": 4992 }, { "epoch": 52.0, "eval_LOCATION_f1": 0.9278350515463919, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9329268292682927, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9444444444444444, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.11802083253860474, "eval_overall_accuracy": 0.988477366255144, "eval_overall_f1": 0.9477611940298507, "eval_overall_precision": 0.9454094292803971, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2822, "eval_samples_per_second": 602.338, "eval_steps_per_second": 10.629, "step": 4992 }, { "epoch": 53.0, "grad_norm": 0.0011605395702645183, "learning_rate": 2.35e-05, "loss": 0.0013, "step": 5088 }, { "epoch": 53.0, "eval_LOCATION_f1": 0.9319371727748691, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9081632653061225, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9300911854103343, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9386503067484663, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.11204486340284348, "eval_overall_accuracy": 0.988477366255144, "eval_overall_f1": 0.9464508094645082, "eval_overall_precision": 0.945273631840796, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2715, "eval_samples_per_second": 626.086, "eval_steps_per_second": 11.049, "step": 5088 }, { "epoch": 54.0, "grad_norm": 0.00036611565155908465, "learning_rate": 2.3000000000000003e-05, "loss": 0.0004, "step": 5184 }, { "epoch": 54.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8811881188118812, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9189189189189191, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9161676646706587, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.13027532398700714, "eval_overall_accuracy": 0.9854595336076818, "eval_overall_f1": 0.9371146732429101, "eval_overall_precision": 0.926829268292683, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2731, "eval_samples_per_second": 622.464, "eval_steps_per_second": 10.985, "step": 5184 }, { "epoch": 55.0, "grad_norm": 0.0003306926810182631, "learning_rate": 2.25e-05, "loss": 0.0005, "step": 5280 }, { "epoch": 55.0, "eval_LOCATION_f1": 0.9430051813471503, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.91, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9329268292682927, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9444444444444444, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.972027972027972, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9652777777777778, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.12078429013490677, "eval_overall_accuracy": 0.9868312757201646, "eval_overall_f1": 0.949194547707559, "eval_overall_precision": 0.9433497536945813, "eval_overall_recall": 0.9551122194513716, "eval_runtime": 0.2723, "eval_samples_per_second": 624.373, "eval_steps_per_second": 11.018, "step": 5280 }, { "epoch": 56.0, "grad_norm": 0.003182685002684593, "learning_rate": 2.2000000000000003e-05, "loss": 0.0006, "step": 5376 }, { "epoch": 56.0, "eval_LOCATION_f1": 0.9326424870466321, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9418960244648319, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9565217391304348, "eval_ORGANIZATION_recall": 0.927710843373494, "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.12055596709251404, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9478908188585607, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2766, "eval_samples_per_second": 614.686, "eval_steps_per_second": 10.847, "step": 5376 }, { "epoch": 57.0, "grad_norm": 0.0003461630258243531, "learning_rate": 2.15e-05, "loss": 0.0009, "step": 5472 }, { "epoch": 57.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9386503067484663, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.95625, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.968421052631579, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.965034965034965, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.1301981806755066, "eval_overall_accuracy": 0.9879286694101509, "eval_overall_f1": 0.9454094292803972, "eval_overall_precision": 0.9407407407407408, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2755, "eval_samples_per_second": 617.052, "eval_steps_per_second": 10.889, "step": 5472 }, { "epoch": 58.0, "grad_norm": 0.0019114755559712648, "learning_rate": 2.1e-05, "loss": 0.0005, "step": 5568 }, { "epoch": 58.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9386503067484663, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.95625, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.11792100220918655, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9477611940298507, "eval_overall_precision": 0.9454094292803971, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2758, "eval_samples_per_second": 616.288, "eval_steps_per_second": 10.876, "step": 5568 }, { "epoch": 59.0, "grad_norm": 0.00043655483750626445, "learning_rate": 2.05e-05, "loss": 0.0007, "step": 5664 }, { "epoch": 59.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8811881188118812, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9353846153846154, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9559748427672956, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9645390070921985, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9714285714285714, "eval_PERSON_recall": 0.9577464788732394, "eval_loss": 0.12990164756774902, "eval_overall_accuracy": 0.9879286694101509, "eval_overall_f1": 0.9413233458177278, "eval_overall_precision": 0.9425, "eval_overall_recall": 0.940149625935162, "eval_runtime": 0.2723, "eval_samples_per_second": 624.28, "eval_steps_per_second": 11.017, "step": 5664 }, { "epoch": 60.0, "grad_norm": 0.06836064904928207, "learning_rate": 2e-05, "loss": 0.0023, "step": 5760 }, { "epoch": 60.0, "eval_LOCATION_f1": 0.9333333333333333, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8921568627450981, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9411764705882353, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9681528662420382, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9686411149825783, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9586206896551724, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.10230163484811783, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9490683229813663, "eval_overall_precision": 0.9455445544554455, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2785, "eval_samples_per_second": 610.349, "eval_steps_per_second": 10.771, "step": 5760 }, { "epoch": 61.0, "grad_norm": 0.0014599261339753866, "learning_rate": 1.9500000000000003e-05, "loss": 0.0003, "step": 5856 }, { "epoch": 61.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9440993788819876, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9743589743589743, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.10883278399705887, "eval_overall_accuracy": 0.9901234567901235, "eval_overall_f1": 0.9548872180451127, "eval_overall_precision": 0.9596977329974811, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2722, "eval_samples_per_second": 624.632, "eval_steps_per_second": 11.023, "step": 5856 }, { "epoch": 62.0, "grad_norm": 0.0015552460681647062, "learning_rate": 1.9e-05, "loss": 0.0002, "step": 5952 }, { "epoch": 62.0, "eval_LOCATION_f1": 0.9326424870466321, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9353846153846154, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9559748427672956, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.11479648947715759, "eval_overall_accuracy": 0.9895747599451303, "eval_overall_f1": 0.951310861423221, "eval_overall_precision": 0.9525, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2706, "eval_samples_per_second": 628.179, "eval_steps_per_second": 11.086, "step": 5952 }, { "epoch": 63.0, "grad_norm": 0.0004029857518617064, "learning_rate": 1.85e-05, "loss": 0.0005, "step": 6048 }, { "epoch": 63.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9329268292682927, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9444444444444444, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.12344972789287567, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9515527950310558, "eval_overall_precision": 0.948019801980198, "eval_overall_recall": 0.9551122194513716, "eval_runtime": 0.2763, "eval_samples_per_second": 615.164, "eval_steps_per_second": 10.856, "step": 6048 }, { "epoch": 64.0, "grad_norm": 0.0006202245713211596, "learning_rate": 1.8e-05, "loss": 0.0004, "step": 6144 }, { "epoch": 64.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8811881188118812, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9411764705882353, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9681528662420382, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.11112768203020096, "eval_overall_accuracy": 0.9893004115226337, "eval_overall_f1": 0.9473684210526315, "eval_overall_precision": 0.9521410579345088, "eval_overall_recall": 0.942643391521197, "eval_runtime": 0.2851, "eval_samples_per_second": 596.233, "eval_steps_per_second": 10.522, "step": 6144 }, { "epoch": 65.0, "grad_norm": 0.011487938463687897, "learning_rate": 1.75e-05, "loss": 0.0004, "step": 6240 }, { "epoch": 65.0, "eval_LOCATION_f1": 0.9326424870466321, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9361702127659575, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9447852760736196, "eval_ORGANIZATION_recall": 0.927710843373494, "eval_PERSON_f1": 0.9716312056737589, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9785714285714285, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.11858505010604858, "eval_overall_accuracy": 0.988477366255144, "eval_overall_f1": 0.9477611940298507, "eval_overall_precision": 0.9454094292803971, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2715, "eval_samples_per_second": 626.091, "eval_steps_per_second": 11.049, "step": 6240 }, { "epoch": 66.0, "grad_norm": 0.0013585726264864206, "learning_rate": 1.7000000000000003e-05, "loss": 0.0003, "step": 6336 }, { "epoch": 66.0, "eval_LOCATION_f1": 0.9326424870466321, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9415384615384617, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9622641509433962, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.11873988062143326, "eval_overall_accuracy": 0.9895747599451303, "eval_overall_f1": 0.951188986232791, "eval_overall_precision": 0.9547738693467337, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2717, "eval_samples_per_second": 625.753, "eval_steps_per_second": 11.043, "step": 6336 }, { "epoch": 67.0, "grad_norm": 0.02065558359026909, "learning_rate": 1.65e-05, "loss": 0.0003, "step": 6432 }, { "epoch": 67.0, "eval_LOCATION_f1": 0.9270833333333334, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.898989898989899, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.12448842823505402, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9450000000000001, "eval_overall_precision": 0.9473684210526315, "eval_overall_recall": 0.942643391521197, "eval_runtime": 0.2857, "eval_samples_per_second": 594.984, "eval_steps_per_second": 10.5, "step": 6432 }, { "epoch": 68.0, "grad_norm": 0.0004572625330183655, "learning_rate": 1.6000000000000003e-05, "loss": 0.0006, "step": 6528 }, { "epoch": 68.0, "eval_LOCATION_f1": 0.9270833333333334, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.898989898989899, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9325153374233127, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.95, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.12086562812328339, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9461827284105131, "eval_overall_precision": 0.949748743718593, "eval_overall_recall": 0.942643391521197, "eval_runtime": 0.2716, "eval_samples_per_second": 625.944, "eval_steps_per_second": 11.046, "step": 6528 }, { "epoch": 69.0, "grad_norm": 0.0009455174440518022, "learning_rate": 1.55e-05, "loss": 0.0003, "step": 6624 }, { "epoch": 69.0, "eval_LOCATION_f1": 0.9270833333333334, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.898989898989899, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9716312056737589, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9785714285714285, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.11985588073730469, "eval_overall_accuracy": 0.9882030178326474, "eval_overall_f1": 0.9438202247191011, "eval_overall_precision": 0.945, "eval_overall_recall": 0.942643391521197, "eval_runtime": 0.2865, "eval_samples_per_second": 593.269, "eval_steps_per_second": 10.469, "step": 6624 }, { "epoch": 70.0, "grad_norm": 0.00020860238873865455, "learning_rate": 1.5e-05, "loss": 0.0003, "step": 6720 }, { "epoch": 70.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8811881188118812, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9353846153846154, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9559748427672956, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.11953891068696976, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9450000000000001, "eval_overall_precision": 0.9473684210526315, "eval_overall_recall": 0.942643391521197, "eval_runtime": 0.2731, "eval_samples_per_second": 622.549, "eval_steps_per_second": 10.986, "step": 6720 }, { "epoch": 71.0, "grad_norm": 0.15998342633247375, "learning_rate": 1.45e-05, "loss": 0.0006, "step": 6816 }, { "epoch": 71.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8811881188118812, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9382716049382716, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9620253164556962, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.1208614706993103, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9461827284105131, "eval_overall_precision": 0.949748743718593, "eval_overall_recall": 0.942643391521197, "eval_runtime": 0.3529, "eval_samples_per_second": 481.662, "eval_steps_per_second": 8.5, "step": 6816 }, { "epoch": 72.0, "grad_norm": 0.00031563107040710747, "learning_rate": 1.4000000000000001e-05, "loss": 0.0003, "step": 6912 }, { "epoch": 72.0, "eval_LOCATION_f1": 0.9278350515463919, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9292307692307692, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.949685534591195, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.12843403220176697, "eval_overall_accuracy": 0.9882030178326474, "eval_overall_f1": 0.9450000000000001, "eval_overall_precision": 0.9473684210526315, "eval_overall_recall": 0.942643391521197, "eval_runtime": 0.2763, "eval_samples_per_second": 615.169, "eval_steps_per_second": 10.856, "step": 6912 }, { "epoch": 73.0, "grad_norm": 0.0016298560658469796, "learning_rate": 1.3500000000000001e-05, "loss": 0.0007, "step": 7008 }, { "epoch": 73.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9329268292682927, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9444444444444444, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.1270994246006012, "eval_overall_accuracy": 0.9879286694101509, "eval_overall_f1": 0.945273631840796, "eval_overall_precision": 0.9429280397022333, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2819, "eval_samples_per_second": 603.122, "eval_steps_per_second": 10.643, "step": 7008 }, { "epoch": 74.0, "grad_norm": 0.0005925354198552668, "learning_rate": 1.3000000000000001e-05, "loss": 0.0006, "step": 7104 }, { "epoch": 74.0, "eval_LOCATION_f1": 0.922279792746114, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9353846153846154, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9559748427672956, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.13110530376434326, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9461827284105131, "eval_overall_precision": 0.949748743718593, "eval_overall_recall": 0.942643391521197, "eval_runtime": 0.2779, "eval_samples_per_second": 611.731, "eval_steps_per_second": 10.795, "step": 7104 }, { "epoch": 75.0, "grad_norm": 0.0012239688076078892, "learning_rate": 1.25e-05, "loss": 0.0012, "step": 7200 }, { "epoch": 75.0, "eval_LOCATION_f1": 0.9278350515463919, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9382716049382716, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9620253164556962, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.12370780855417252, "eval_overall_accuracy": 0.9893004115226337, "eval_overall_f1": 0.9486858573216519, "eval_overall_precision": 0.9522613065326633, "eval_overall_recall": 0.9451371571072319, "eval_runtime": 0.2874, "eval_samples_per_second": 591.468, "eval_steps_per_second": 10.438, "step": 7200 }, { "epoch": 76.0, "grad_norm": 0.00020759411563631147, "learning_rate": 1.2e-05, "loss": 0.001, "step": 7296 }, { "epoch": 76.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9345794392523364, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.967741935483871, "eval_ORGANIZATION_recall": 0.9036144578313253, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.12144968658685684, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.946047678795483, "eval_overall_precision": 0.952020202020202, "eval_overall_recall": 0.940149625935162, "eval_runtime": 0.2755, "eval_samples_per_second": 617.131, "eval_steps_per_second": 10.891, "step": 7296 }, { "epoch": 77.0, "grad_norm": 0.0014589158818125725, "learning_rate": 1.1500000000000002e-05, "loss": 0.0003, "step": 7392 }, { "epoch": 77.0, "eval_LOCATION_f1": 0.9278350515463919, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9329268292682927, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9444444444444444, "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.11771202087402344, "eval_overall_accuracy": 0.988477366255144, "eval_overall_f1": 0.9477611940298507, "eval_overall_precision": 0.9454094292803971, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2777, "eval_samples_per_second": 612.084, "eval_steps_per_second": 10.801, "step": 7392 }, { "epoch": 78.0, "grad_norm": 0.00015712979075033218, "learning_rate": 1.1000000000000001e-05, "loss": 0.0005, "step": 7488 }, { "epoch": 78.0, "eval_LOCATION_f1": 0.9278350515463919, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9390243902439024, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9506172839506173, "eval_ORGANIZATION_recall": 0.927710843373494, "eval_PERSON_f1": 0.9750889679715302, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9856115107913669, "eval_PERSON_recall": 0.9647887323943662, "eval_loss": 0.12339749932289124, "eval_overall_accuracy": 0.988477366255144, "eval_overall_f1": 0.9489414694894147, "eval_overall_precision": 0.9477611940298507, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2743, "eval_samples_per_second": 619.863, "eval_steps_per_second": 10.939, "step": 7488 }, { "epoch": 79.0, "grad_norm": 0.0004729692591354251, "learning_rate": 1.05e-05, "loss": 0.0018, "step": 7584 }, { "epoch": 79.0, "eval_LOCATION_f1": 0.9270833333333334, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.898989898989899, "eval_LOCATION_recall": 0.956989247311828, "eval_ORGANIZATION_f1": 0.9393939393939394, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9451219512195121, "eval_ORGANIZATION_recall": 0.9337349397590361, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.11351267993450165, "eval_overall_accuracy": 0.9893004115226337, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9478908188585607, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2762, "eval_samples_per_second": 615.482, "eval_steps_per_second": 10.861, "step": 7584 }, { "epoch": 80.0, "grad_norm": 0.00032218199339695275, "learning_rate": 1e-05, "loss": 0.0004, "step": 7680 }, { "epoch": 80.0, "eval_LOCATION_f1": 0.9278350515463919, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9325153374233127, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.95, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.12128762900829315, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9476309226932669, "eval_overall_precision": 0.9476309226932669, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2769, "eval_samples_per_second": 613.887, "eval_steps_per_second": 10.833, "step": 7680 }, { "epoch": 81.0, "grad_norm": 0.029026150703430176, "learning_rate": 9.5e-06, "loss": 0.0002, "step": 7776 }, { "epoch": 81.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9320987654320988, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9556962025316456, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.12814442813396454, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9463171036204744, "eval_overall_precision": 0.9475, "eval_overall_recall": 0.9451371571072319, "eval_runtime": 0.2781, "eval_samples_per_second": 611.202, "eval_steps_per_second": 10.786, "step": 7776 }, { "epoch": 82.0, "grad_norm": 0.000658427132293582, "learning_rate": 9e-06, "loss": 0.0007, "step": 7872 }, { "epoch": 82.0, "eval_LOCATION_f1": 0.9374999999999999, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9090909090909091, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9390243902439024, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9506172839506173, "eval_ORGANIZATION_recall": 0.927710843373494, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.11656615138053894, "eval_overall_accuracy": 0.9895747599451303, "eval_overall_f1": 0.9526184538653366, "eval_overall_precision": 0.9526184538653366, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2753, "eval_samples_per_second": 617.513, "eval_steps_per_second": 10.897, "step": 7872 }, { "epoch": 83.0, "grad_norm": 0.0005032567423768342, "learning_rate": 8.500000000000002e-06, "loss": 0.0004, "step": 7968 }, { "epoch": 83.0, "eval_LOCATION_f1": 0.9137055837563453, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8653846153846154, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9235474006116209, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.937888198757764, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.13869613409042358, "eval_overall_accuracy": 0.9873799725651577, "eval_overall_f1": 0.9404466501240694, "eval_overall_precision": 0.9358024691358025, "eval_overall_recall": 0.9451371571072319, "eval_runtime": 0.2738, "eval_samples_per_second": 620.855, "eval_steps_per_second": 10.956, "step": 7968 }, { "epoch": 84.0, "grad_norm": 0.00019312210497446358, "learning_rate": 8.000000000000001e-06, "loss": 0.0002, "step": 8064 }, { "epoch": 84.0, "eval_LOCATION_f1": 0.9183673469387754, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8737864077669902, "eval_LOCATION_recall": 0.967741935483871, "eval_ORGANIZATION_f1": 0.9292307692307692, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.949685534591195, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.1367323249578476, "eval_overall_accuracy": 0.988477366255144, "eval_overall_f1": 0.9439601494396015, "eval_overall_precision": 0.9427860696517413, "eval_overall_recall": 0.9451371571072319, "eval_runtime": 0.2741, "eval_samples_per_second": 620.208, "eval_steps_per_second": 10.945, "step": 8064 }, { "epoch": 85.0, "grad_norm": 0.001622176030650735, "learning_rate": 7.5e-06, "loss": 0.0007, "step": 8160 }, { "epoch": 85.0, "eval_LOCATION_f1": 0.9333333333333333, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8921568627450981, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9292307692307692, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.949685534591195, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.1286858767271042, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9476309226932669, "eval_overall_precision": 0.9476309226932669, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2841, "eval_samples_per_second": 598.45, "eval_steps_per_second": 10.561, "step": 8160 }, { "epoch": 86.0, "grad_norm": 0.00034102791687473655, "learning_rate": 7.000000000000001e-06, "loss": 0.0008, "step": 8256 }, { "epoch": 86.0, "eval_LOCATION_f1": 0.9333333333333333, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8921568627450981, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9263803680981595, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.94375, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.12806376814842224, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9477611940298507, "eval_overall_precision": 0.9454094292803971, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2835, "eval_samples_per_second": 599.572, "eval_steps_per_second": 10.581, "step": 8256 }, { "epoch": 87.0, "grad_norm": 0.0075645060278475285, "learning_rate": 6.5000000000000004e-06, "loss": 0.0002, "step": 8352 }, { "epoch": 87.0, "eval_LOCATION_f1": 0.9333333333333333, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8921568627450981, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9292307692307692, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.949685534591195, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.12655898928642273, "eval_overall_accuracy": 0.9887517146776406, "eval_overall_f1": 0.9476309226932669, "eval_overall_precision": 0.9476309226932669, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2728, "eval_samples_per_second": 623.199, "eval_steps_per_second": 10.998, "step": 8352 }, { "epoch": 88.0, "grad_norm": 6.502119541168213, "learning_rate": 6e-06, "loss": 0.0005, "step": 8448 }, { "epoch": 88.0, "eval_LOCATION_f1": 0.9333333333333333, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.8921568627450981, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9320987654320988, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9556962025316456, "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.12733419239521027, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9488139825218477, "eval_overall_precision": 0.95, "eval_overall_recall": 0.9476309226932669, "eval_runtime": 0.2718, "eval_samples_per_second": 625.547, "eval_steps_per_second": 11.039, "step": 8448 }, { "epoch": 89.0, "grad_norm": 0.00019170860468875617, "learning_rate": 5.500000000000001e-06, "loss": 0.0003, "step": 8544 }, { "epoch": 89.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9353846153846154, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9559748427672956, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.12625864148139954, "eval_overall_accuracy": 0.9893004115226337, "eval_overall_f1": 0.951310861423221, "eval_overall_precision": 0.9525, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2872, "eval_samples_per_second": 591.84, "eval_steps_per_second": 10.444, "step": 8544 }, { "epoch": 90.0, "grad_norm": 0.0001946709380717948, "learning_rate": 5e-06, "loss": 0.0001, "step": 8640 }, { "epoch": 90.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9325153374233127, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.95, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.1265157163143158, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9501246882793017, "eval_overall_precision": 0.9501246882793017, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2894, "eval_samples_per_second": 587.341, "eval_steps_per_second": 10.365, "step": 8640 }, { "epoch": 91.0, "grad_norm": 0.00022210170573089272, "learning_rate": 4.5e-06, "loss": 0.0002, "step": 8736 }, { "epoch": 91.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9325153374233127, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.95, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, "eval_loss": 0.12690623104572296, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9501246882793017, "eval_overall_precision": 0.9501246882793017, "eval_overall_recall": 0.9501246882793017, "eval_runtime": 0.2731, "eval_samples_per_second": 622.455, "eval_steps_per_second": 10.985, "step": 8736 }, { "epoch": 92.0, "grad_norm": 0.00021052845113445073, "learning_rate": 4.000000000000001e-06, "loss": 0.0001, "step": 8832 }, { "epoch": 92.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.12825678288936615, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9478908188585607, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2808, "eval_samples_per_second": 605.4, "eval_steps_per_second": 10.684, "step": 8832 }, { "epoch": 93.0, "grad_norm": 0.00014110341726336628, "learning_rate": 3.5000000000000004e-06, "loss": 0.0002, "step": 8928 }, { "epoch": 93.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.1284293830394745, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9478908188585607, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.275, "eval_samples_per_second": 618.112, "eval_steps_per_second": 10.908, "step": 8928 }, { "epoch": 94.0, "grad_norm": 0.00015382652054540813, "learning_rate": 3e-06, "loss": 0.0002, "step": 9024 }, { "epoch": 94.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.12859950959682465, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9478908188585607, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2729, "eval_samples_per_second": 622.959, "eval_steps_per_second": 10.993, "step": 9024 }, { "epoch": 95.0, "grad_norm": 0.00017179538554046303, "learning_rate": 2.5e-06, "loss": 0.0002, "step": 9120 }, { "epoch": 95.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.1287505179643631, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9478908188585607, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.274, "eval_samples_per_second": 620.437, "eval_steps_per_second": 10.949, "step": 9120 }, { "epoch": 96.0, "grad_norm": 0.00012161211634520441, "learning_rate": 2.0000000000000003e-06, "loss": 0.0002, "step": 9216 }, { "epoch": 96.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.12845784425735474, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9478908188585607, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2731, "eval_samples_per_second": 622.489, "eval_steps_per_second": 10.985, "step": 9216 }, { "epoch": 97.0, "grad_norm": 0.0003890093066729605, "learning_rate": 1.5e-06, "loss": 0.0002, "step": 9312 }, { "epoch": 97.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.1285652369260788, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9478908188585607, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2776, "eval_samples_per_second": 612.459, "eval_steps_per_second": 10.808, "step": 9312 }, { "epoch": 98.0, "grad_norm": 0.00015593560237903148, "learning_rate": 1.0000000000000002e-06, "loss": 0.0005, "step": 9408 }, { "epoch": 98.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.12773250043392181, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9478908188585607, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2768, "eval_samples_per_second": 614.21, "eval_steps_per_second": 10.839, "step": 9408 }, { "epoch": 99.0, "grad_norm": 9.94185174931772e-05, "learning_rate": 5.000000000000001e-07, "loss": 0.0004, "step": 9504 }, { "epoch": 99.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.12759125232696533, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9478908188585607, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2724, "eval_samples_per_second": 624.185, "eval_steps_per_second": 11.015, "step": 9504 }, { "epoch": 100.0, "grad_norm": 0.00015960106975398958, "learning_rate": 0.0, "loss": 0.0003, "step": 9600 }, { "epoch": 100.0, "eval_LOCATION_f1": 0.9381443298969072, "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.978494623655914, "eval_ORGANIZATION_f1": 0.9296636085626911, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9440993788819876, "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, "eval_loss": 0.12765251100063324, "eval_overall_accuracy": 0.9890260631001372, "eval_overall_f1": 0.9502487562189055, "eval_overall_precision": 0.9478908188585607, "eval_overall_recall": 0.9526184538653366, "eval_runtime": 0.2729, "eval_samples_per_second": 623.019, "eval_steps_per_second": 10.994, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 3771733492877262.0, "train_loss": 0.005277395940502174, "train_runtime": 859.6096, "train_samples_per_second": 178.104, "train_steps_per_second": 11.168 } ], "logging_steps": 500, "max_steps": 9600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 3771733492877262.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }