{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.8896476626396179, "learning_rate": 4.9500000000000004e-05, "loss": 0.847, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.02, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.16666666666666666, "eval_LOCATION_recall": 0.010638297872340425, "eval_ORGANIZATION_f1": 0.2524752475247525, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.21518987341772153, "eval_ORGANIZATION_recall": 0.30538922155688625, "eval_PERSON_f1": 0.2832861189801699, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.23148148148148148, "eval_PERSON_recall": 0.36496350364963503, "eval_loss": 0.40576934814453125, "eval_overall_accuracy": 0.8621546961325967, "eval_overall_f1": 0.23803967327887982, "eval_overall_precision": 0.2222222222222222, "eval_overall_recall": 0.2562814070351759, "eval_runtime": 0.2851, "eval_samples_per_second": 596.345, "eval_steps_per_second": 10.524, "step": 96 }, { "epoch": 2.0, "grad_norm": 0.794258713722229, "learning_rate": 4.9e-05, "loss": 0.3603, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.42424242424242425, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.40384615384615385, "eval_LOCATION_recall": 0.44680851063829785, "eval_ORGANIZATION_f1": 0.6084656084656085, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.5450236966824644, "eval_ORGANIZATION_recall": 0.688622754491018, "eval_PERSON_f1": 0.8610169491525423, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.8037974683544303, "eval_PERSON_recall": 0.927007299270073, "eval_loss": 0.21202494204044342, "eval_overall_accuracy": 0.9397790055248619, "eval_overall_f1": 0.6521239954075776, "eval_overall_precision": 0.6004228329809725, "eval_overall_recall": 0.7135678391959799, "eval_runtime": 0.2838, "eval_samples_per_second": 598.917, "eval_steps_per_second": 10.569, "step": 192 }, { "epoch": 3.0, "grad_norm": 1.086693525314331, "learning_rate": 4.85e-05, "loss": 0.1884, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.806282722513089, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.7938144329896907, "eval_LOCATION_recall": 0.8191489361702128, "eval_ORGANIZATION_f1": 0.8034682080924855, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.776536312849162, "eval_ORGANIZATION_recall": 0.8323353293413174, "eval_PERSON_f1": 0.9675090252707581, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9571428571428572, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.0927986279129982, "eval_overall_accuracy": 0.9707182320441989, "eval_overall_f1": 0.85995085995086, "eval_overall_precision": 0.8413461538461539, "eval_overall_recall": 0.8793969849246231, "eval_runtime": 0.2833, "eval_samples_per_second": 600.16, "eval_steps_per_second": 10.591, "step": 288 }, { "epoch": 4.0, "grad_norm": 0.6702704429626465, "learning_rate": 4.8e-05, "loss": 0.1293, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.8541666666666667, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8367346938775511, "eval_LOCATION_recall": 0.8723404255319149, "eval_ORGANIZATION_f1": 0.8621700879765396, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.8448275862068966, "eval_ORGANIZATION_recall": 0.8802395209580839, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06611457467079163, "eval_overall_accuracy": 0.9795580110497237, "eval_overall_f1": 0.9007444168734491, "eval_overall_precision": 0.8897058823529411, "eval_overall_recall": 0.9120603015075377, "eval_runtime": 0.2829, "eval_samples_per_second": 601.01, "eval_steps_per_second": 10.606, "step": 384 }, { "epoch": 5.0, "grad_norm": 0.8596266508102417, "learning_rate": 4.75e-05, "loss": 0.1088, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.8795811518324608, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.865979381443299, "eval_LOCATION_recall": 0.8936170212765957, "eval_ORGANIZATION_f1": 0.8639053254437871, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.8538011695906432, "eval_ORGANIZATION_recall": 0.874251497005988, "eval_PERSON_f1": 0.989010989010989, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9926470588235294, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.05564097315073013, "eval_overall_accuracy": 0.9820441988950276, "eval_overall_f1": 0.9102244389027431, "eval_overall_precision": 0.9034653465346535, "eval_overall_recall": 0.9170854271356784, "eval_runtime": 0.2834, "eval_samples_per_second": 599.781, "eval_steps_per_second": 10.584, "step": 480 }, { "epoch": 6.0, "grad_norm": 0.638867974281311, "learning_rate": 4.7e-05, "loss": 0.0895, "step": 576 }, { "epoch": 6.0, "eval_LOCATION_f1": 0.8695652173913044, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.7964601769911505, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.8875739644970414, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.8771929824561403, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9926470588235294, "eval_PERSON_number": 137, "eval_PERSON_precision": 1.0, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.053672779351472855, "eval_overall_accuracy": 0.9831491712707182, "eval_overall_f1": 0.9179926560587515, "eval_overall_precision": 0.8949880668257757, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.283, "eval_samples_per_second": 600.68, "eval_steps_per_second": 10.6, "step": 576 }, { "epoch": 7.0, "grad_norm": 0.6115458607673645, "learning_rate": 4.6500000000000005e-05, "loss": 0.0849, "step": 672 }, { "epoch": 7.0, "eval_LOCATION_f1": 0.8557213930348259, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8037383177570093, "eval_LOCATION_recall": 0.9148936170212766, "eval_ORGANIZATION_f1": 0.8961424332344213, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.888235294117647, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.981549815498155, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9925373134328358, "eval_PERSON_recall": 0.9708029197080292, "eval_loss": 0.054772019386291504, "eval_overall_accuracy": 0.9812154696132597, "eval_overall_f1": 0.9147095179233623, "eval_overall_precision": 0.9002433090024331, "eval_overall_recall": 0.9296482412060302, "eval_runtime": 0.2838, "eval_samples_per_second": 599.05, "eval_steps_per_second": 10.571, "step": 672 }, { "epoch": 8.0, "grad_norm": 0.41238775849342346, "learning_rate": 4.600000000000001e-05, "loss": 0.0802, "step": 768 }, { "epoch": 8.0, "eval_LOCATION_f1": 0.8768472906403939, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8165137614678899, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.8562874251497006, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.8562874251497006, "eval_ORGANIZATION_recall": 0.8562874251497006, "eval_PERSON_f1": 0.9926470588235294, "eval_PERSON_number": 137, "eval_PERSON_precision": 1.0, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.04992206394672394, "eval_overall_accuracy": 0.9828729281767956, "eval_overall_f1": 0.9072929542645242, "eval_overall_precision": 0.8929440389294404, "eval_overall_recall": 0.9221105527638191, "eval_runtime": 0.2846, "eval_samples_per_second": 597.415, "eval_steps_per_second": 10.543, "step": 768 }, { "epoch": 9.0, "grad_norm": 0.34571850299835205, "learning_rate": 4.55e-05, "loss": 0.0711, "step": 864 }, { "epoch": 9.0, "eval_LOCATION_f1": 0.89, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.839622641509434, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.8982035928143712, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.8982035928143712, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9926470588235294, "eval_PERSON_number": 137, "eval_PERSON_precision": 1.0, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.046244241297245026, "eval_overall_accuracy": 0.9848066298342542, "eval_overall_f1": 0.9280397022332505, "eval_overall_precision": 0.9166666666666666, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.283, "eval_samples_per_second": 600.617, "eval_steps_per_second": 10.599, "step": 864 }, { "epoch": 10.0, "grad_norm": 0.8111412525177002, "learning_rate": 4.5e-05, "loss": 0.0656, "step": 960 }, { "epoch": 10.0, "eval_LOCATION_f1": 0.8823529411764706, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8181818181818182, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.8882352941176471, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.8728323699421965, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05290156975388527, "eval_overall_accuracy": 0.9825966850828729, "eval_overall_f1": 0.9168704156479217, "eval_overall_precision": 0.8928571428571429, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2854, "eval_samples_per_second": 595.584, "eval_steps_per_second": 10.51, "step": 960 }, { "epoch": 11.0, "grad_norm": 0.6214551329612732, "learning_rate": 4.4500000000000004e-05, "loss": 0.0637, "step": 1056 }, { "epoch": 11.0, "eval_LOCATION_f1": 0.8866995073891627, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8256880733944955, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.8848484848484848, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.8957055214723927, "eval_ORGANIZATION_recall": 0.874251497005988, "eval_PERSON_f1": 0.9852941176470589, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9925925925925926, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.04657911881804466, "eval_overall_accuracy": 0.9842541436464088, "eval_overall_f1": 0.9192546583850932, "eval_overall_precision": 0.9090909090909091, "eval_overall_recall": 0.9296482412060302, "eval_runtime": 0.2845, "eval_samples_per_second": 597.458, "eval_steps_per_second": 10.543, "step": 1056 }, { "epoch": 12.0, "grad_norm": 1.341524600982666, "learning_rate": 4.4000000000000006e-05, "loss": 0.0544, "step": 1152 }, { "epoch": 12.0, "eval_LOCATION_f1": 0.8811881188118812, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8240740740740741, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9024390243902439, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9192546583850931, "eval_ORGANIZATION_recall": 0.8862275449101796, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.04851439967751503, "eval_overall_accuracy": 0.9845303867403314, "eval_overall_f1": 0.9240348692403487, "eval_overall_precision": 0.9160493827160494, "eval_overall_recall": 0.9321608040201005, "eval_runtime": 0.2859, "eval_samples_per_second": 594.706, "eval_steps_per_second": 10.495, "step": 1152 }, { "epoch": 13.0, "grad_norm": 0.6547470092773438, "learning_rate": 4.35e-05, "loss": 0.0521, "step": 1248 }, { "epoch": 13.0, "eval_LOCATION_f1": 0.8944723618090452, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8476190476190476, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9212121212121211, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9325153374233128, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9852941176470589, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9925925925925926, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.043917302042245865, "eval_overall_accuracy": 0.9870165745856354, "eval_overall_f1": 0.9363295880149813, "eval_overall_precision": 0.9305210918114144, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2853, "eval_samples_per_second": 595.909, "eval_steps_per_second": 10.516, "step": 1248 }, { "epoch": 14.0, "grad_norm": 0.8619551062583923, "learning_rate": 4.3e-05, "loss": 0.0535, "step": 1344 }, { "epoch": 14.0, "eval_LOCATION_f1": 0.8877551020408163, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8529411764705882, "eval_LOCATION_recall": 0.925531914893617, "eval_ORGANIZATION_f1": 0.9058823529411765, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.8901734104046243, "eval_ORGANIZATION_recall": 0.9221556886227545, "eval_PERSON_f1": 0.989010989010989, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9926470588235294, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.04290783777832985, "eval_overall_accuracy": 0.9856353591160221, "eval_overall_f1": 0.9295426452410384, "eval_overall_precision": 0.9148418491484185, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2843, "eval_samples_per_second": 597.979, "eval_steps_per_second": 10.553, "step": 1344 }, { "epoch": 15.0, "grad_norm": 0.5241861939430237, "learning_rate": 4.25e-05, "loss": 0.0502, "step": 1440 }, { "epoch": 15.0, "eval_LOCATION_f1": 0.9119170984455959, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9361702127659575, "eval_ORGANIZATION_f1": 0.9151515151515152, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9263803680981595, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.044674795120954514, "eval_overall_accuracy": 0.9875690607734806, "eval_overall_f1": 0.9371859296482412, "eval_overall_precision": 0.9371859296482412, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2861, "eval_samples_per_second": 594.159, "eval_steps_per_second": 10.485, "step": 1440 }, { "epoch": 16.0, "grad_norm": 1.6386876106262207, "learning_rate": 4.2e-05, "loss": 0.0473, "step": 1536 }, { "epoch": 16.0, "eval_LOCATION_f1": 0.9166666666666666, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8979591836734694, "eval_LOCATION_recall": 0.9361702127659575, "eval_ORGANIZATION_f1": 0.9129129129129129, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9156626506024096, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9926470588235294, "eval_PERSON_number": 137, "eval_PERSON_precision": 1.0, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.03941977769136429, "eval_overall_accuracy": 0.9881215469613259, "eval_overall_f1": 0.9410288582183186, "eval_overall_precision": 0.9398496240601504, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2855, "eval_samples_per_second": 595.472, "eval_steps_per_second": 10.508, "step": 1536 }, { "epoch": 17.0, "grad_norm": 1.2366855144500732, "learning_rate": 4.15e-05, "loss": 0.0459, "step": 1632 }, { "epoch": 17.0, "eval_LOCATION_f1": 0.8955223880597015, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8411214953271028, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9123867069486405, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9207317073170732, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9926470588235294, "eval_PERSON_number": 137, "eval_PERSON_precision": 1.0, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.04495147615671158, "eval_overall_accuracy": 0.9848066298342542, "eval_overall_f1": 0.9353233830845771, "eval_overall_precision": 0.9261083743842364, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2841, "eval_samples_per_second": 598.453, "eval_steps_per_second": 10.561, "step": 1632 }, { "epoch": 18.0, "grad_norm": 0.5993749499320984, "learning_rate": 4.1e-05, "loss": 0.046, "step": 1728 }, { "epoch": 18.0, "eval_LOCATION_f1": 0.9045226130653266, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8571428571428571, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.920245398773006, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9433962264150944, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05112641304731369, "eval_overall_accuracy": 0.9861878453038674, "eval_overall_f1": 0.9361702127659574, "eval_overall_precision": 0.9326683291770573, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.286, "eval_samples_per_second": 594.451, "eval_steps_per_second": 10.49, "step": 1728 }, { "epoch": 19.0, "grad_norm": 0.5033515095710754, "learning_rate": 4.05e-05, "loss": 0.042, "step": 1824 }, { "epoch": 19.0, "eval_LOCATION_f1": 0.9090909090909091, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8653846153846154, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.8996960486322189, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9135802469135802, "eval_ORGANIZATION_recall": 0.8862275449101796, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05157466232776642, "eval_overall_accuracy": 0.9850828729281768, "eval_overall_f1": 0.9288389513108615, "eval_overall_precision": 0.9230769230769231, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2852, "eval_samples_per_second": 596.106, "eval_steps_per_second": 10.52, "step": 1824 }, { "epoch": 20.0, "grad_norm": 0.4982008635997772, "learning_rate": 4e-05, "loss": 0.0423, "step": 1920 }, { "epoch": 20.0, "eval_LOCATION_f1": 0.9081632653061226, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8725490196078431, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9179331306990882, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9320987654320988, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.989010989010989, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9926470588235294, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.04306156933307648, "eval_overall_accuracy": 0.9867403314917127, "eval_overall_f1": 0.9398496240601504, "eval_overall_precision": 0.9375, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2833, "eval_samples_per_second": 600.113, "eval_steps_per_second": 10.59, "step": 1920 }, { "epoch": 21.0, "grad_norm": 0.7683495879173279, "learning_rate": 3.9500000000000005e-05, "loss": 0.0406, "step": 2016 }, { "epoch": 21.0, "eval_LOCATION_f1": 0.9292929292929293, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8846153846153846, "eval_LOCATION_recall": 0.9787234042553191, "eval_ORGANIZATION_f1": 0.9156626506024097, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9212121212121213, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9926470588235294, "eval_PERSON_number": 137, "eval_PERSON_precision": 1.0, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.04385242983698845, "eval_overall_accuracy": 0.9867403314917127, "eval_overall_f1": 0.9451371571072318, "eval_overall_precision": 0.9381188118811881, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2855, "eval_samples_per_second": 595.539, "eval_steps_per_second": 10.51, "step": 2016 }, { "epoch": 22.0, "grad_norm": 0.472768098115921, "learning_rate": 3.9000000000000006e-05, "loss": 0.0362, "step": 2112 }, { "epoch": 22.0, "eval_LOCATION_f1": 0.9081632653061226, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8725490196078431, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.8975903614457832, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9030303030303031, "eval_ORGANIZATION_recall": 0.8922155688622755, "eval_PERSON_f1": 0.9926470588235294, "eval_PERSON_number": 137, "eval_PERSON_precision": 1.0, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.04270213469862938, "eval_overall_accuracy": 0.9867403314917127, "eval_overall_f1": 0.9325, "eval_overall_precision": 0.927860696517413, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2828, "eval_samples_per_second": 601.178, "eval_steps_per_second": 10.609, "step": 2112 }, { "epoch": 23.0, "grad_norm": 0.18248304724693298, "learning_rate": 3.85e-05, "loss": 0.0356, "step": 2208 }, { "epoch": 23.0, "eval_LOCATION_f1": 0.9183673469387754, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9113149847094801, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.93125, "eval_ORGANIZATION_recall": 0.8922155688622755, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05271751061081886, "eval_overall_accuracy": 0.9878453038674033, "eval_overall_f1": 0.9360100376411543, "eval_overall_precision": 0.9348370927318296, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.284, "eval_samples_per_second": 598.566, "eval_steps_per_second": 10.563, "step": 2208 }, { "epoch": 24.0, "grad_norm": 0.44694140553474426, "learning_rate": 3.8e-05, "loss": 0.0334, "step": 2304 }, { "epoch": 24.0, "eval_LOCATION_f1": 0.9278350515463918, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9357798165137615, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.95625, "eval_ORGANIZATION_recall": 0.9161676646706587, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.04986017942428589, "eval_overall_accuracy": 0.9883977900552486, "eval_overall_f1": 0.9496221662468514, "eval_overall_precision": 0.952020202020202, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2836, "eval_samples_per_second": 599.492, "eval_steps_per_second": 10.579, "step": 2304 }, { "epoch": 25.0, "grad_norm": 0.9199225306510925, "learning_rate": 3.7500000000000003e-05, "loss": 0.0357, "step": 2400 }, { "epoch": 25.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.8975903614457832, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9030303030303031, "eval_ORGANIZATION_recall": 0.8922155688622755, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.048680081963539124, "eval_overall_accuracy": 0.9850828729281768, "eval_overall_f1": 0.9311639549436797, "eval_overall_precision": 0.9276807980049875, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2848, "eval_samples_per_second": 596.912, "eval_steps_per_second": 10.534, "step": 2400 }, { "epoch": 26.0, "grad_norm": 0.3040350377559662, "learning_rate": 3.7e-05, "loss": 0.035, "step": 2496 }, { "epoch": 26.0, "eval_LOCATION_f1": 0.9119170984455959, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9361702127659575, "eval_ORGANIZATION_f1": 0.906906906906907, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9096385542168675, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9710144927536232, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9640287769784173, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.04824273660778999, "eval_overall_accuracy": 0.9859116022099448, "eval_overall_f1": 0.9301745635910225, "eval_overall_precision": 0.9232673267326733, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2838, "eval_samples_per_second": 599.028, "eval_steps_per_second": 10.571, "step": 2496 }, { "epoch": 27.0, "grad_norm": 1.0237709283828735, "learning_rate": 3.65e-05, "loss": 0.0322, "step": 2592 }, { "epoch": 27.0, "eval_LOCATION_f1": 0.9166666666666666, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8979591836734694, "eval_LOCATION_recall": 0.9361702127659575, "eval_ORGANIZATION_f1": 0.9156626506024097, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9212121212121213, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.047346312552690506, "eval_overall_accuracy": 0.9870165745856354, "eval_overall_f1": 0.9385194479297366, "eval_overall_precision": 0.9373433583959899, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2845, "eval_samples_per_second": 597.52, "eval_steps_per_second": 10.544, "step": 2592 }, { "epoch": 28.0, "grad_norm": 0.8279179930686951, "learning_rate": 3.6e-05, "loss": 0.0313, "step": 2688 }, { "epoch": 28.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9207317073170732, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.937888198757764, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.049984320998191833, "eval_overall_accuracy": 0.987292817679558, "eval_overall_f1": 0.9396984924623115, "eval_overall_precision": 0.9396984924623115, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2831, "eval_samples_per_second": 600.413, "eval_steps_per_second": 10.596, "step": 2688 }, { "epoch": 29.0, "grad_norm": 0.41118767857551575, "learning_rate": 3.55e-05, "loss": 0.029, "step": 2784 }, { "epoch": 29.0, "eval_LOCATION_f1": 0.9285714285714286, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8921568627450981, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9325153374233129, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9559748427672956, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.989010989010989, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9926470588235294, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.05203305557370186, "eval_overall_accuracy": 0.9878453038674033, "eval_overall_f1": 0.950943396226415, "eval_overall_precision": 0.9521410579345088, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2873, "eval_samples_per_second": 591.682, "eval_steps_per_second": 10.441, "step": 2784 }, { "epoch": 30.0, "grad_norm": 1.1524934768676758, "learning_rate": 3.5e-05, "loss": 0.0282, "step": 2880 }, { "epoch": 30.0, "eval_LOCATION_f1": 0.9128205128205128, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8811881188118812, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9053254437869822, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.8947368421052632, "eval_ORGANIZATION_recall": 0.9161676646706587, "eval_PERSON_f1": 0.9854014598540146, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9854014598540146, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.0583416111767292, "eval_overall_accuracy": 0.9848066298342542, "eval_overall_f1": 0.9343246592317224, "eval_overall_precision": 0.921760391198044, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2834, "eval_samples_per_second": 599.9, "eval_steps_per_second": 10.586, "step": 2880 }, { "epoch": 31.0, "grad_norm": 0.43164223432540894, "learning_rate": 3.45e-05, "loss": 0.0298, "step": 2976 }, { "epoch": 31.0, "eval_LOCATION_f1": 0.9222797927461138, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.898989898989899, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9107692307692307, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9367088607594937, "eval_ORGANIZATION_recall": 0.8862275449101796, "eval_PERSON_f1": 0.989010989010989, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9926470588235294, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.051134929060935974, "eval_overall_accuracy": 0.9870165745856354, "eval_overall_f1": 0.9405815423514539, "eval_overall_precision": 0.9465648854961832, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2848, "eval_samples_per_second": 596.811, "eval_steps_per_second": 10.532, "step": 2976 }, { "epoch": 32.0, "grad_norm": 0.5094390511512756, "learning_rate": 3.4000000000000007e-05, "loss": 0.0288, "step": 3072 }, { "epoch": 32.0, "eval_LOCATION_f1": 0.9246231155778896, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8761904761904762, "eval_LOCATION_recall": 0.9787234042553191, "eval_ORGANIZATION_f1": 0.9174311926605505, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9375, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05673923343420029, "eval_overall_accuracy": 0.9861878453038674, "eval_overall_f1": 0.94, "eval_overall_precision": 0.9353233830845771, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2842, "eval_samples_per_second": 598.164, "eval_steps_per_second": 10.556, "step": 3072 }, { "epoch": 33.0, "grad_norm": 0.11313607543706894, "learning_rate": 3.35e-05, "loss": 0.0266, "step": 3168 }, { "epoch": 33.0, "eval_LOCATION_f1": 0.9166666666666666, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8979591836734694, "eval_LOCATION_recall": 0.9361702127659575, "eval_ORGANIZATION_f1": 0.9146341463414633, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9316770186335404, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.053803134709596634, "eval_overall_accuracy": 0.9875690607734806, "eval_overall_f1": 0.9370277078085643, "eval_overall_precision": 0.9393939393939394, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2849, "eval_samples_per_second": 596.711, "eval_steps_per_second": 10.53, "step": 3168 }, { "epoch": 34.0, "grad_norm": 0.3134906589984894, "learning_rate": 3.3e-05, "loss": 0.0257, "step": 3264 }, { "epoch": 34.0, "eval_LOCATION_f1": 0.9368421052631578, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9270833333333334, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9144542772861356, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9011627906976745, "eval_ORGANIZATION_recall": 0.9281437125748503, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05229897052049637, "eval_overall_accuracy": 0.987292817679558, "eval_overall_f1": 0.9426433915211971, "eval_overall_precision": 0.9356435643564357, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2843, "eval_samples_per_second": 597.982, "eval_steps_per_second": 10.553, "step": 3264 }, { "epoch": 35.0, "grad_norm": 0.1841432750225067, "learning_rate": 3.2500000000000004e-05, "loss": 0.026, "step": 3360 }, { "epoch": 35.0, "eval_LOCATION_f1": 0.9230769230769231, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.8902439024390244, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.906832298136646, "eval_ORGANIZATION_recall": 0.874251497005988, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05274403467774391, "eval_overall_accuracy": 0.9856353591160221, "eval_overall_f1": 0.9296482412060302, "eval_overall_precision": 0.9296482412060302, "eval_overall_recall": 0.9296482412060302, "eval_runtime": 0.2843, "eval_samples_per_second": 598.063, "eval_steps_per_second": 10.554, "step": 3360 }, { "epoch": 36.0, "grad_norm": 0.536849319934845, "learning_rate": 3.2000000000000005e-05, "loss": 0.0256, "step": 3456 }, { "epoch": 36.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9244712990936556, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9329268292682927, "eval_ORGANIZATION_recall": 0.9161676646706587, "eval_PERSON_f1": 0.9926470588235294, "eval_PERSON_number": 137, "eval_PERSON_precision": 1.0, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.05037631839513779, "eval_overall_accuracy": 0.9881215469613259, "eval_overall_f1": 0.9460476787954831, "eval_overall_precision": 0.9448621553884712, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.283, "eval_samples_per_second": 600.613, "eval_steps_per_second": 10.599, "step": 3456 }, { "epoch": 37.0, "grad_norm": 0.49685901403427124, "learning_rate": 3.15e-05, "loss": 0.023, "step": 3552 }, { "epoch": 37.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.924924924924925, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.927710843373494, "eval_ORGANIZATION_recall": 0.9221556886227545, "eval_PERSON_f1": 0.9745454545454545, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9710144927536232, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05539943650364876, "eval_overall_accuracy": 0.9870165745856354, "eval_overall_f1": 0.9401496259351622, "eval_overall_precision": 0.9331683168316832, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2864, "eval_samples_per_second": 593.48, "eval_steps_per_second": 10.473, "step": 3552 }, { "epoch": 38.0, "grad_norm": 0.44660648703575134, "learning_rate": 3.1e-05, "loss": 0.0225, "step": 3648 }, { "epoch": 38.0, "eval_LOCATION_f1": 0.9326424870466321, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9090909090909091, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9337349397590362, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9393939393939394, "eval_ORGANIZATION_recall": 0.9281437125748503, "eval_PERSON_f1": 0.9854014598540146, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9854014598540146, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.049222446978092194, "eval_overall_accuracy": 0.9883977900552486, "eval_overall_f1": 0.951188986232791, "eval_overall_precision": 0.9476309226932669, "eval_overall_recall": 0.9547738693467337, "eval_runtime": 0.2831, "eval_samples_per_second": 600.431, "eval_steps_per_second": 10.596, "step": 3648 }, { "epoch": 39.0, "grad_norm": 0.3947770297527313, "learning_rate": 3.05e-05, "loss": 0.0229, "step": 3744 }, { "epoch": 39.0, "eval_LOCATION_f1": 0.9137055837563451, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8737864077669902, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9085365853658537, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9254658385093167, "eval_ORGANIZATION_recall": 0.8922155688622755, "eval_PERSON_f1": 0.9852941176470589, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9925925925925926, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05170230194926262, "eval_overall_accuracy": 0.987292817679558, "eval_overall_f1": 0.9360100376411543, "eval_overall_precision": 0.9348370927318296, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2881, "eval_samples_per_second": 590.04, "eval_steps_per_second": 10.412, "step": 3744 }, { "epoch": 40.0, "grad_norm": 0.5973528027534485, "learning_rate": 3e-05, "loss": 0.0207, "step": 3840 }, { "epoch": 40.0, "eval_LOCATION_f1": 0.9128205128205128, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8811881188118812, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9161676646706587, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9161676646706587, "eval_ORGANIZATION_recall": 0.9161676646706587, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05221958085894585, "eval_overall_accuracy": 0.9867403314917127, "eval_overall_f1": 0.9364881693648817, "eval_overall_precision": 0.928395061728395, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2844, "eval_samples_per_second": 597.749, "eval_steps_per_second": 10.549, "step": 3840 }, { "epoch": 41.0, "grad_norm": 0.2345399707555771, "learning_rate": 2.95e-05, "loss": 0.0235, "step": 3936 }, { "epoch": 41.0, "eval_LOCATION_f1": 0.9191919191919191, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9041916167664671, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9041916167664671, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9854014598540146, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9854014598540146, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.0559840090572834, "eval_overall_accuracy": 0.9861878453038674, "eval_overall_f1": 0.9354838709677419, "eval_overall_precision": 0.9240196078431373, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2835, "eval_samples_per_second": 599.648, "eval_steps_per_second": 10.582, "step": 3936 }, { "epoch": 42.0, "grad_norm": 0.5430697798728943, "learning_rate": 2.9e-05, "loss": 0.0199, "step": 4032 }, { "epoch": 42.0, "eval_LOCATION_f1": 0.9285714285714286, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8921568627450981, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9174311926605505, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9375, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.989010989010989, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9926470588235294, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.0529998317360878, "eval_overall_accuracy": 0.9881215469613259, "eval_overall_f1": 0.9447236180904522, "eval_overall_precision": 0.9447236180904522, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2806, "eval_samples_per_second": 605.779, "eval_steps_per_second": 10.69, "step": 4032 }, { "epoch": 43.0, "grad_norm": 0.4822620749473572, "learning_rate": 2.8499999999999998e-05, "loss": 0.0204, "step": 4128 }, { "epoch": 43.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9080118694362018, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9, "eval_ORGANIZATION_recall": 0.9161676646706587, "eval_PERSON_f1": 0.989010989010989, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9926470588235294, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.05750620365142822, "eval_overall_accuracy": 0.9859116022099448, "eval_overall_f1": 0.937810945273632, "eval_overall_precision": 0.9285714285714286, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2858, "eval_samples_per_second": 594.727, "eval_steps_per_second": 10.495, "step": 4128 }, { "epoch": 44.0, "grad_norm": 0.27013102173805237, "learning_rate": 2.8000000000000003e-05, "loss": 0.0201, "step": 4224 }, { "epoch": 44.0, "eval_LOCATION_f1": 0.91, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8584905660377359, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.8916408668730652, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9230769230769231, "eval_ORGANIZATION_recall": 0.8622754491017964, "eval_PERSON_f1": 0.9852941176470589, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9925925925925926, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.062020864337682724, "eval_overall_accuracy": 0.9856353591160221, "eval_overall_f1": 0.9283018867924528, "eval_overall_precision": 0.929471032745592, "eval_overall_recall": 0.9271356783919598, "eval_runtime": 0.285, "eval_samples_per_second": 596.469, "eval_steps_per_second": 10.526, "step": 4224 }, { "epoch": 45.0, "grad_norm": 0.11741367727518082, "learning_rate": 2.7500000000000004e-05, "loss": 0.0202, "step": 4320 }, { "epoch": 45.0, "eval_LOCATION_f1": 0.9326424870466321, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9090909090909091, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9096385542168675, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9854014598540146, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9854014598540146, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.049684587866067886, "eval_overall_accuracy": 0.987292817679558, "eval_overall_f1": 0.9411764705882354, "eval_overall_precision": 0.9376558603491272, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2842, "eval_samples_per_second": 598.094, "eval_steps_per_second": 10.555, "step": 4320 }, { "epoch": 46.0, "grad_norm": 0.5550666451454163, "learning_rate": 2.7000000000000002e-05, "loss": 0.0199, "step": 4416 }, { "epoch": 46.0, "eval_LOCATION_f1": 0.9222797927461138, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.898989898989899, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.918429003021148, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.926829268292683, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9854014598540146, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9854014598540146, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.047310564666986465, "eval_overall_accuracy": 0.9881215469613259, "eval_overall_f1": 0.9423558897243106, "eval_overall_precision": 0.94, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2869, "eval_samples_per_second": 592.503, "eval_steps_per_second": 10.456, "step": 4416 }, { "epoch": 47.0, "grad_norm": 0.6427354216575623, "learning_rate": 2.6500000000000004e-05, "loss": 0.0194, "step": 4512 }, { "epoch": 47.0, "eval_LOCATION_f1": 0.9199999999999999, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8679245283018868, "eval_LOCATION_recall": 0.9787234042553191, "eval_ORGANIZATION_f1": 0.9123867069486405, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9207317073170732, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.989010989010989, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9926470588235294, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.055126290768384933, "eval_overall_accuracy": 0.9861878453038674, "eval_overall_f1": 0.9402985074626866, "eval_overall_precision": 0.9310344827586207, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.283, "eval_samples_per_second": 600.676, "eval_steps_per_second": 10.6, "step": 4512 }, { "epoch": 48.0, "grad_norm": 0.5357906222343445, "learning_rate": 2.6000000000000002e-05, "loss": 0.0177, "step": 4608 }, { "epoch": 48.0, "eval_LOCATION_f1": 0.9333333333333335, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9090909090909091, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9202453987730062, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9854014598540146, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9854014598540146, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.05755533277988434, "eval_overall_accuracy": 0.9867403314917127, "eval_overall_f1": 0.9411764705882354, "eval_overall_precision": 0.9376558603491272, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2857, "eval_samples_per_second": 595.094, "eval_steps_per_second": 10.502, "step": 4608 }, { "epoch": 49.0, "grad_norm": 0.09014078229665756, "learning_rate": 2.5500000000000003e-05, "loss": 0.0171, "step": 4704 }, { "epoch": 49.0, "eval_LOCATION_f1": 0.9191919191919191, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9085365853658537, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9254658385093167, "eval_ORGANIZATION_recall": 0.8922155688622755, "eval_PERSON_f1": 0.9854014598540146, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9854014598540146, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.06671103090047836, "eval_overall_accuracy": 0.9859116022099448, "eval_overall_f1": 0.9375, "eval_overall_precision": 0.9328358208955224, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.285, "eval_samples_per_second": 596.481, "eval_steps_per_second": 10.526, "step": 4704 }, { "epoch": 50.0, "grad_norm": 0.554146945476532, "learning_rate": 2.5e-05, "loss": 0.0197, "step": 4800 }, { "epoch": 50.0, "eval_LOCATION_f1": 0.9292929292929293, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8846153846153846, "eval_LOCATION_recall": 0.9787234042553191, "eval_ORGANIZATION_f1": 0.905775075987842, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9197530864197531, "eval_ORGANIZATION_recall": 0.8922155688622755, "eval_PERSON_f1": 0.9854014598540146, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9854014598540146, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.0641038790345192, "eval_overall_accuracy": 0.9850828729281768, "eval_overall_f1": 0.9388264669163546, "eval_overall_precision": 0.9330024813895782, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2851, "eval_samples_per_second": 596.366, "eval_steps_per_second": 10.524, "step": 4800 }, { "epoch": 51.0, "grad_norm": 0.009796121157705784, "learning_rate": 2.45e-05, "loss": 0.0166, "step": 4896 }, { "epoch": 51.0, "eval_LOCATION_f1": 0.9191919191919191, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9113149847094801, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.93125, "eval_ORGANIZATION_recall": 0.8922155688622755, "eval_PERSON_f1": 0.9854014598540146, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9854014598540146, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.0641901046037674, "eval_overall_accuracy": 0.9870165745856354, "eval_overall_f1": 0.9386733416770965, "eval_overall_precision": 0.9351620947630923, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2856, "eval_samples_per_second": 595.265, "eval_steps_per_second": 10.505, "step": 4896 }, { "epoch": 52.0, "grad_norm": 0.15604431927204132, "learning_rate": 2.4e-05, "loss": 0.0178, "step": 4992 }, { "epoch": 52.0, "eval_LOCATION_f1": 0.9109947643979057, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8969072164948454, "eval_LOCATION_recall": 0.925531914893617, "eval_ORGANIZATION_f1": 0.9074626865671641, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9047619047619048, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.059049077332019806, "eval_overall_accuracy": 0.9867403314917127, "eval_overall_f1": 0.9325, "eval_overall_precision": 0.927860696517413, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2841, "eval_samples_per_second": 598.357, "eval_steps_per_second": 10.559, "step": 4992 }, { "epoch": 53.0, "grad_norm": 0.45889243483543396, "learning_rate": 2.35e-05, "loss": 0.0175, "step": 5088 }, { "epoch": 53.0, "eval_LOCATION_f1": 0.9238578680203046, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.883495145631068, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9079754601226994, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9308176100628931, "eval_ORGANIZATION_recall": 0.8862275449101796, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06729099154472351, "eval_overall_accuracy": 0.9870165745856354, "eval_overall_f1": 0.9360100376411543, "eval_overall_precision": 0.9348370927318296, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2862, "eval_samples_per_second": 593.986, "eval_steps_per_second": 10.482, "step": 5088 }, { "epoch": 54.0, "grad_norm": 0.344496488571167, "learning_rate": 2.3000000000000003e-05, "loss": 0.0161, "step": 5184 }, { "epoch": 54.0, "eval_LOCATION_f1": 0.9137055837563451, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8737864077669902, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.8996960486322189, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9135802469135802, "eval_ORGANIZATION_recall": 0.8862275449101796, "eval_PERSON_f1": 0.9854014598540146, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9854014598540146, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.05877530574798584, "eval_overall_accuracy": 0.9867403314917127, "eval_overall_f1": 0.9325, "eval_overall_precision": 0.927860696517413, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2845, "eval_samples_per_second": 597.533, "eval_steps_per_second": 10.545, "step": 5184 }, { "epoch": 55.0, "grad_norm": 0.6249045133590698, "learning_rate": 2.25e-05, "loss": 0.0155, "step": 5280 }, { "epoch": 55.0, "eval_LOCATION_f1": 0.9230769230769231, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.888888888888889, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.891566265060241, "eval_ORGANIZATION_recall": 0.8862275449101796, "eval_PERSON_f1": 0.9854014598540146, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9854014598540146, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.06252730637788773, "eval_overall_accuracy": 0.9864640883977901, "eval_overall_f1": 0.9301745635910225, "eval_overall_precision": 0.9232673267326733, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2834, "eval_samples_per_second": 599.855, "eval_steps_per_second": 10.586, "step": 5280 }, { "epoch": 56.0, "grad_norm": 1.1858857870101929, "learning_rate": 2.2000000000000003e-05, "loss": 0.017, "step": 5376 }, { "epoch": 56.0, "eval_LOCATION_f1": 0.9081632653061226, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8725490196078431, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9036144578313253, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9090909090909091, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.989010989010989, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9926470588235294, "eval_PERSON_recall": 0.9854014598540146, "eval_loss": 0.05949300900101662, "eval_overall_accuracy": 0.9864640883977901, "eval_overall_f1": 0.9338327091136079, "eval_overall_precision": 0.9280397022332506, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2854, "eval_samples_per_second": 595.649, "eval_steps_per_second": 10.511, "step": 5376 }, { "epoch": 57.0, "grad_norm": 0.5108211040496826, "learning_rate": 2.15e-05, "loss": 0.016, "step": 5472 }, { "epoch": 57.0, "eval_LOCATION_f1": 0.9238578680203046, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.883495145631068, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9207317073170732, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.937888198757764, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05901944637298584, "eval_overall_accuracy": 0.9881215469613259, "eval_overall_f1": 0.9423558897243106, "eval_overall_precision": 0.94, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2836, "eval_samples_per_second": 599.434, "eval_steps_per_second": 10.578, "step": 5472 }, { "epoch": 58.0, "grad_norm": 0.23780736327171326, "learning_rate": 2.1e-05, "loss": 0.0148, "step": 5568 }, { "epoch": 58.0, "eval_LOCATION_f1": 0.9270833333333334, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9081632653061225, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9207317073170732, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.937888198757764, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.059471599757671356, "eval_overall_accuracy": 0.9881215469613259, "eval_overall_f1": 0.9420654911838791, "eval_overall_precision": 0.9444444444444444, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.285, "eval_samples_per_second": 596.511, "eval_steps_per_second": 10.527, "step": 5568 }, { "epoch": 59.0, "grad_norm": 0.6566023826599121, "learning_rate": 2.05e-05, "loss": 0.0148, "step": 5664 }, { "epoch": 59.0, "eval_LOCATION_f1": 0.9375000000000001, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9183673469387755, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.918429003021148, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.926829268292683, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05748390778899193, "eval_overall_accuracy": 0.9881215469613259, "eval_overall_f1": 0.9435382685069008, "eval_overall_precision": 0.9423558897243107, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2852, "eval_samples_per_second": 596.035, "eval_steps_per_second": 10.518, "step": 5664 }, { "epoch": 60.0, "grad_norm": 1.078636884689331, "learning_rate": 2e-05, "loss": 0.0121, "step": 5760 }, { "epoch": 60.0, "eval_LOCATION_f1": 0.9109947643979057, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8969072164948454, "eval_LOCATION_recall": 0.925531914893617, "eval_ORGANIZATION_f1": 0.9194029850746269, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9166666666666666, "eval_ORGANIZATION_recall": 0.9221556886227545, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06202729046344757, "eval_overall_accuracy": 0.9864640883977901, "eval_overall_f1": 0.9375, "eval_overall_precision": 0.9328358208955224, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2855, "eval_samples_per_second": 595.438, "eval_steps_per_second": 10.508, "step": 5760 }, { "epoch": 61.0, "grad_norm": 0.41355055570602417, "learning_rate": 1.9500000000000003e-05, "loss": 0.0159, "step": 5856 }, { "epoch": 61.0, "eval_LOCATION_f1": 0.9214659685863875, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9072164948453608, "eval_LOCATION_recall": 0.9361702127659575, "eval_ORGANIZATION_f1": 0.9161676646706587, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9161676646706587, "eval_ORGANIZATION_recall": 0.9161676646706587, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06286400556564331, "eval_overall_accuracy": 0.9870165745856354, "eval_overall_f1": 0.9386733416770965, "eval_overall_precision": 0.9351620947630923, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2844, "eval_samples_per_second": 597.713, "eval_steps_per_second": 10.548, "step": 5856 }, { "epoch": 62.0, "grad_norm": 0.3999706208705902, "learning_rate": 1.9e-05, "loss": 0.0155, "step": 5952 }, { "epoch": 62.0, "eval_LOCATION_f1": 0.9333333333333335, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9146341463414633, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9316770186335404, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06919211149215698, "eval_overall_accuracy": 0.9870165745856354, "eval_overall_f1": 0.9410288582183186, "eval_overall_precision": 0.9398496240601504, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2826, "eval_samples_per_second": 601.473, "eval_steps_per_second": 10.614, "step": 5952 }, { "epoch": 63.0, "grad_norm": 0.17356404662132263, "learning_rate": 1.85e-05, "loss": 0.0143, "step": 6048 }, { "epoch": 63.0, "eval_LOCATION_f1": 0.8958333333333333, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8775510204081632, "eval_LOCATION_recall": 0.9148936170212766, "eval_ORGANIZATION_f1": 0.9074626865671641, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9047619047619048, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06559401750564575, "eval_overall_accuracy": 0.9859116022099448, "eval_overall_f1": 0.9288389513108615, "eval_overall_precision": 0.9230769230769231, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2831, "eval_samples_per_second": 600.394, "eval_steps_per_second": 10.595, "step": 6048 }, { "epoch": 64.0, "grad_norm": 0.09309961646795273, "learning_rate": 1.8e-05, "loss": 0.0137, "step": 6144 }, { "epoch": 64.0, "eval_LOCATION_f1": 0.9424083769633509, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9278350515463918, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9118541033434651, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9259259259259259, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06567072868347168, "eval_overall_accuracy": 0.9870165745856354, "eval_overall_f1": 0.9420654911838791, "eval_overall_precision": 0.9444444444444444, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2841, "eval_samples_per_second": 598.33, "eval_steps_per_second": 10.559, "step": 6144 }, { "epoch": 65.0, "grad_norm": 0.10257239639759064, "learning_rate": 1.75e-05, "loss": 0.0135, "step": 6240 }, { "epoch": 65.0, "eval_LOCATION_f1": 0.9109947643979057, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8969072164948454, "eval_LOCATION_recall": 0.925531914893617, "eval_ORGANIZATION_f1": 0.918429003021148, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.926829268292683, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06831258535385132, "eval_overall_accuracy": 0.9859116022099448, "eval_overall_f1": 0.9371859296482412, "eval_overall_precision": 0.9371859296482412, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2845, "eval_samples_per_second": 597.605, "eval_steps_per_second": 10.546, "step": 6240 }, { "epoch": 66.0, "grad_norm": 0.4253714084625244, "learning_rate": 1.7000000000000003e-05, "loss": 0.0128, "step": 6336 }, { "epoch": 66.0, "eval_LOCATION_f1": 0.9381443298969071, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.91, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9118541033434651, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9259259259259259, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9708029197080292, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9708029197080292, "eval_PERSON_recall": 0.9708029197080292, "eval_loss": 0.06572767347097397, "eval_overall_accuracy": 0.9875690607734806, "eval_overall_f1": 0.9385194479297366, "eval_overall_precision": 0.9373433583959899, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2845, "eval_samples_per_second": 597.515, "eval_steps_per_second": 10.544, "step": 6336 }, { "epoch": 67.0, "grad_norm": 0.0277443528175354, "learning_rate": 1.65e-05, "loss": 0.0142, "step": 6432 }, { "epoch": 67.0, "eval_LOCATION_f1": 0.9424083769633509, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9278350515463918, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9244712990936556, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9329268292682927, "eval_ORGANIZATION_recall": 0.9161676646706587, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06505744904279709, "eval_overall_accuracy": 0.9881215469613259, "eval_overall_f1": 0.9472361809045227, "eval_overall_precision": 0.9472361809045227, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2868, "eval_samples_per_second": 592.666, "eval_steps_per_second": 10.459, "step": 6432 }, { "epoch": 68.0, "grad_norm": 0.0382203571498394, "learning_rate": 1.6000000000000003e-05, "loss": 0.0127, "step": 6528 }, { "epoch": 68.0, "eval_LOCATION_f1": 0.9157894736842105, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.90625, "eval_LOCATION_recall": 0.925531914893617, "eval_ORGANIZATION_f1": 0.9129129129129129, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9156626506024096, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06681419909000397, "eval_overall_accuracy": 0.9867403314917127, "eval_overall_f1": 0.9371859296482412, "eval_overall_precision": 0.9371859296482412, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2834, "eval_samples_per_second": 599.927, "eval_steps_per_second": 10.587, "step": 6528 }, { "epoch": 69.0, "grad_norm": 0.3297584056854248, "learning_rate": 1.55e-05, "loss": 0.0129, "step": 6624 }, { "epoch": 69.0, "eval_LOCATION_f1": 0.9333333333333335, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9051987767584098, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.925, "eval_ORGANIZATION_recall": 0.8862275449101796, "eval_PERSON_f1": 0.9743589743589743, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9779411764705882, "eval_PERSON_recall": 0.9708029197080292, "eval_loss": 0.06309264898300171, "eval_overall_accuracy": 0.9878453038674033, "eval_overall_f1": 0.9358490566037736, "eval_overall_precision": 0.9370277078085643, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2849, "eval_samples_per_second": 596.656, "eval_steps_per_second": 10.529, "step": 6624 }, { "epoch": 70.0, "grad_norm": 0.6310251355171204, "learning_rate": 1.5e-05, "loss": 0.0125, "step": 6720 }, { "epoch": 70.0, "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9118541033434651, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9259259259259259, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06364177167415619, "eval_overall_accuracy": 0.987292817679558, "eval_overall_f1": 0.9360100376411543, "eval_overall_precision": 0.9348370927318296, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2848, "eval_samples_per_second": 596.883, "eval_steps_per_second": 10.533, "step": 6720 }, { "epoch": 71.0, "grad_norm": 1.16694974899292, "learning_rate": 1.45e-05, "loss": 0.0119, "step": 6816 }, { "epoch": 71.0, "eval_LOCATION_f1": 0.9381443298969071, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.91, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9305135951661632, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9390243902439024, "eval_ORGANIZATION_recall": 0.9221556886227545, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06310304254293442, "eval_overall_accuracy": 0.9886740331491712, "eval_overall_f1": 0.9498746867167919, "eval_overall_precision": 0.9475, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2822, "eval_samples_per_second": 602.389, "eval_steps_per_second": 10.63, "step": 6816 }, { "epoch": 72.0, "grad_norm": 1.1335972547531128, "learning_rate": 1.4000000000000001e-05, "loss": 0.0111, "step": 6912 }, { "epoch": 72.0, "eval_LOCATION_f1": 0.9381443298969071, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.91, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9221556886227545, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9221556886227545, "eval_ORGANIZATION_recall": 0.9221556886227545, "eval_PERSON_f1": 0.9779411764705882, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9851851851851852, "eval_PERSON_recall": 0.9708029197080292, "eval_loss": 0.06212692707777023, "eval_overall_accuracy": 0.9881215469613259, "eval_overall_f1": 0.9450000000000001, "eval_overall_precision": 0.9402985074626866, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2849, "eval_samples_per_second": 596.755, "eval_steps_per_second": 10.531, "step": 6912 }, { "epoch": 73.0, "grad_norm": 0.1433609277009964, "learning_rate": 1.3500000000000001e-05, "loss": 0.0121, "step": 7008 }, { "epoch": 73.0, "eval_LOCATION_f1": 0.9368421052631578, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9270833333333334, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.927710843373494, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9333333333333333, "eval_ORGANIZATION_recall": 0.9221556886227545, "eval_PERSON_f1": 0.9743589743589743, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9779411764705882, "eval_PERSON_recall": 0.9708029197080292, "eval_loss": 0.0616704598069191, "eval_overall_accuracy": 0.988950276243094, "eval_overall_f1": 0.9459119496855346, "eval_overall_precision": 0.947103274559194, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2862, "eval_samples_per_second": 593.888, "eval_steps_per_second": 10.48, "step": 7008 }, { "epoch": 74.0, "grad_norm": 0.7654500603675842, "learning_rate": 1.3000000000000001e-05, "loss": 0.0114, "step": 7104 }, { "epoch": 74.0, "eval_LOCATION_f1": 0.9326424870466321, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9090909090909091, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9156626506024097, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9212121212121213, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06411295384168625, "eval_overall_accuracy": 0.9883977900552486, "eval_overall_f1": 0.9423558897243106, "eval_overall_precision": 0.94, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2839, "eval_samples_per_second": 598.907, "eval_steps_per_second": 10.569, "step": 7104 }, { "epoch": 75.0, "grad_norm": 0.044443562626838684, "learning_rate": 1.25e-05, "loss": 0.0109, "step": 7200 }, { "epoch": 75.0, "eval_LOCATION_f1": 0.9381443298969071, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.91, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9216867469879517, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9272727272727272, "eval_ORGANIZATION_recall": 0.9161676646706587, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06598102301359177, "eval_overall_accuracy": 0.9878453038674033, "eval_overall_f1": 0.9450000000000001, "eval_overall_precision": 0.9402985074626866, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2932, "eval_samples_per_second": 579.866, "eval_steps_per_second": 10.233, "step": 7200 }, { "epoch": 76.0, "grad_norm": 0.2716529071331024, "learning_rate": 1.2e-05, "loss": 0.0105, "step": 7296 }, { "epoch": 76.0, "eval_LOCATION_f1": 0.9417989417989417, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9368421052631579, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.9189189189189191, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9216867469879518, "eval_ORGANIZATION_recall": 0.9161676646706587, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.0662764459848404, "eval_overall_accuracy": 0.9878453038674033, "eval_overall_f1": 0.9459119496855346, "eval_overall_precision": 0.947103274559194, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2824, "eval_samples_per_second": 601.9, "eval_steps_per_second": 10.622, "step": 7296 }, { "epoch": 77.0, "grad_norm": 0.08800482749938965, "learning_rate": 1.1500000000000002e-05, "loss": 0.01, "step": 7392 }, { "epoch": 77.0, "eval_LOCATION_f1": 0.9326424870466321, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9090909090909091, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9063444108761329, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9146341463414634, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06778395175933838, "eval_overall_accuracy": 0.9864640883977901, "eval_overall_f1": 0.93734335839599, "eval_overall_precision": 0.935, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2852, "eval_samples_per_second": 596.163, "eval_steps_per_second": 10.521, "step": 7392 }, { "epoch": 78.0, "grad_norm": 0.17953966557979584, "learning_rate": 1.1000000000000001e-05, "loss": 0.0102, "step": 7488 }, { "epoch": 78.0, "eval_LOCATION_f1": 0.9319371727748691, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9175257731958762, "eval_LOCATION_recall": 0.9468085106382979, "eval_ORGANIZATION_f1": 0.906906906906907, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9096385542168675, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9852941176470589, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9925925925925926, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06711044162511826, "eval_overall_accuracy": 0.9875690607734806, "eval_overall_f1": 0.9396984924623115, "eval_overall_precision": 0.9396984924623115, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2836, "eval_samples_per_second": 599.334, "eval_steps_per_second": 10.576, "step": 7488 }, { "epoch": 79.0, "grad_norm": 0.45518046617507935, "learning_rate": 1.05e-05, "loss": 0.0095, "step": 7584 }, { "epoch": 79.0, "eval_LOCATION_f1": 0.9424083769633509, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9278350515463918, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9221556886227545, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9221556886227545, "eval_ORGANIZATION_recall": 0.9221556886227545, "eval_PERSON_f1": 0.9779411764705882, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9851851851851852, "eval_PERSON_recall": 0.9708029197080292, "eval_loss": 0.06550143659114838, "eval_overall_accuracy": 0.988950276243094, "eval_overall_f1": 0.9460476787954831, "eval_overall_precision": 0.9448621553884712, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2909, "eval_samples_per_second": 584.434, "eval_steps_per_second": 10.314, "step": 7584 }, { "epoch": 80.0, "grad_norm": 0.49637171626091003, "learning_rate": 1e-05, "loss": 0.0104, "step": 7680 }, { "epoch": 80.0, "eval_LOCATION_f1": 0.8934010152284263, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8543689320388349, "eval_LOCATION_recall": 0.9361702127659575, "eval_ORGANIZATION_f1": 0.9123867069486405, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9207317073170732, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9743589743589743, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9779411764705882, "eval_PERSON_recall": 0.9708029197080292, "eval_loss": 0.0712551549077034, "eval_overall_accuracy": 0.9859116022099448, "eval_overall_f1": 0.9288389513108615, "eval_overall_precision": 0.9230769230769231, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2859, "eval_samples_per_second": 594.515, "eval_steps_per_second": 10.491, "step": 7680 }, { "epoch": 81.0, "grad_norm": 0.8264366984367371, "learning_rate": 9.5e-06, "loss": 0.011, "step": 7776 }, { "epoch": 81.0, "eval_LOCATION_f1": 0.9278350515463918, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9096385542168675, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9852941176470589, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9925925925925926, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06782893091440201, "eval_overall_accuracy": 0.9875690607734806, "eval_overall_f1": 0.9398496240601504, "eval_overall_precision": 0.9375, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2843, "eval_samples_per_second": 598.061, "eval_steps_per_second": 10.554, "step": 7776 }, { "epoch": 82.0, "grad_norm": 0.017817312851548195, "learning_rate": 9e-06, "loss": 0.0102, "step": 7872 }, { "epoch": 82.0, "eval_LOCATION_f1": 0.9278350515463918, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9156626506024097, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9212121212121213, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06931216269731522, "eval_overall_accuracy": 0.987292817679558, "eval_overall_f1": 0.9411764705882354, "eval_overall_precision": 0.9376558603491272, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2849, "eval_samples_per_second": 596.791, "eval_steps_per_second": 10.532, "step": 7872 }, { "epoch": 83.0, "grad_norm": 0.10394305735826492, "learning_rate": 8.500000000000002e-06, "loss": 0.0092, "step": 7968 }, { "epoch": 83.0, "eval_LOCATION_f1": 0.9230769230769231, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.906906906906907, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9096385542168675, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9743589743589743, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9779411764705882, "eval_PERSON_recall": 0.9708029197080292, "eval_loss": 0.06826651841402054, "eval_overall_accuracy": 0.9867403314917127, "eval_overall_f1": 0.9338327091136079, "eval_overall_precision": 0.9280397022332506, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2842, "eval_samples_per_second": 598.171, "eval_steps_per_second": 10.556, "step": 7968 }, { "epoch": 84.0, "grad_norm": 0.02320408634841442, "learning_rate": 8.000000000000001e-06, "loss": 0.009, "step": 8064 }, { "epoch": 84.0, "eval_LOCATION_f1": 0.9230769230769231, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8910891089108911, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9096385542168675, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.07002082467079163, "eval_overall_accuracy": 0.9867403314917127, "eval_overall_f1": 0.9375, "eval_overall_precision": 0.9328358208955224, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2819, "eval_samples_per_second": 603.102, "eval_steps_per_second": 10.643, "step": 8064 }, { "epoch": 85.0, "grad_norm": 0.1980428844690323, "learning_rate": 7.5e-06, "loss": 0.0087, "step": 8160 }, { "epoch": 85.0, "eval_LOCATION_f1": 0.9424083769633509, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9278350515463918, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9272727272727274, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9386503067484663, "eval_ORGANIZATION_recall": 0.9161676646706587, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06669559329748154, "eval_overall_accuracy": 0.9886740331491712, "eval_overall_f1": 0.9496221662468514, "eval_overall_precision": 0.952020202020202, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2851, "eval_samples_per_second": 596.211, "eval_steps_per_second": 10.521, "step": 8160 }, { "epoch": 86.0, "grad_norm": 1.0510958433151245, "learning_rate": 7.000000000000001e-06, "loss": 0.0091, "step": 8256 }, { "epoch": 86.0, "eval_LOCATION_f1": 0.9430051813471503, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9191919191919192, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9207317073170732, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.937888198757764, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.06715427339076996, "eval_overall_accuracy": 0.9886740331491712, "eval_overall_f1": 0.9459119496855346, "eval_overall_precision": 0.947103274559194, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2841, "eval_samples_per_second": 598.281, "eval_steps_per_second": 10.558, "step": 8256 }, { "epoch": 87.0, "grad_norm": 0.7366644740104675, "learning_rate": 6.5000000000000004e-06, "loss": 0.0101, "step": 8352 }, { "epoch": 87.0, "eval_LOCATION_f1": 0.9183673469387754, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9096385542168675, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9743589743589743, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9779411764705882, "eval_PERSON_recall": 0.9708029197080292, "eval_loss": 0.068853460252285, "eval_overall_accuracy": 0.9867403314917127, "eval_overall_f1": 0.9338327091136079, "eval_overall_precision": 0.9280397022332506, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2824, "eval_samples_per_second": 601.979, "eval_steps_per_second": 10.623, "step": 8352 }, { "epoch": 88.0, "grad_norm": 0.09895297884941101, "learning_rate": 6e-06, "loss": 0.009, "step": 8448 }, { "epoch": 88.0, "eval_LOCATION_f1": 0.9430051813471503, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9191919191919192, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9151515151515152, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9263803680981595, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9743589743589743, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9779411764705882, "eval_PERSON_recall": 0.9708029197080292, "eval_loss": 0.06802190095186234, "eval_overall_accuracy": 0.9881215469613259, "eval_overall_f1": 0.9422110552763819, "eval_overall_precision": 0.9422110552763819, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2824, "eval_samples_per_second": 602.036, "eval_steps_per_second": 10.624, "step": 8448 }, { "epoch": 89.0, "grad_norm": 0.9783310890197754, "learning_rate": 5.500000000000001e-06, "loss": 0.0102, "step": 8544 }, { "epoch": 89.0, "eval_LOCATION_f1": 0.9285714285714286, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8921568627450981, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.905775075987842, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9197530864197531, "eval_ORGANIZATION_recall": 0.8922155688622755, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.07053259760141373, "eval_overall_accuracy": 0.9870165745856354, "eval_overall_f1": 0.9361702127659574, "eval_overall_precision": 0.9326683291770573, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2837, "eval_samples_per_second": 599.311, "eval_steps_per_second": 10.576, "step": 8544 }, { "epoch": 90.0, "grad_norm": 0.4508645236492157, "learning_rate": 5e-06, "loss": 0.0082, "step": 8640 }, { "epoch": 90.0, "eval_LOCATION_f1": 0.9333333333333335, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9063444108761329, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9146341463414634, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9708029197080292, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9708029197080292, "eval_PERSON_recall": 0.9708029197080292, "eval_loss": 0.06930597126483917, "eval_overall_accuracy": 0.9870165745856354, "eval_overall_f1": 0.9349999999999999, "eval_overall_precision": 0.9303482587064676, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2849, "eval_samples_per_second": 596.782, "eval_steps_per_second": 10.531, "step": 8640 }, { "epoch": 91.0, "grad_norm": 0.42777031660079956, "learning_rate": 4.5e-06, "loss": 0.01, "step": 8736 }, { "epoch": 91.0, "eval_LOCATION_f1": 0.9333333333333335, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9118541033434651, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9259259259259259, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.07097171992063522, "eval_overall_accuracy": 0.9875690607734806, "eval_overall_f1": 0.9398496240601504, "eval_overall_precision": 0.9375, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2817, "eval_samples_per_second": 603.418, "eval_steps_per_second": 10.649, "step": 8736 }, { "epoch": 92.0, "grad_norm": 0.27081435918807983, "learning_rate": 4.000000000000001e-06, "loss": 0.0072, "step": 8832 }, { "epoch": 92.0, "eval_LOCATION_f1": 0.9183673469387754, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.9574468085106383, "eval_ORGANIZATION_f1": 0.9151515151515152, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9263803680981595, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.07146384567022324, "eval_overall_accuracy": 0.987292817679558, "eval_overall_f1": 0.9375, "eval_overall_precision": 0.9328358208955224, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2844, "eval_samples_per_second": 597.698, "eval_steps_per_second": 10.548, "step": 8832 }, { "epoch": 93.0, "grad_norm": 0.448527067899704, "learning_rate": 3.5000000000000004e-06, "loss": 0.007, "step": 8928 }, { "epoch": 93.0, "eval_LOCATION_f1": 0.9333333333333335, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.900990099009901, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9179331306990882, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9320987654320988, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.07204049080610275, "eval_overall_accuracy": 0.9875690607734806, "eval_overall_f1": 0.9423558897243106, "eval_overall_precision": 0.94, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2824, "eval_samples_per_second": 601.964, "eval_steps_per_second": 10.623, "step": 8928 }, { "epoch": 94.0, "grad_norm": 0.19812382757663727, "learning_rate": 3e-06, "loss": 0.0083, "step": 9024 }, { "epoch": 94.0, "eval_LOCATION_f1": 0.9381443298969071, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.91, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9174311926605505, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9375, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.07203315198421478, "eval_overall_accuracy": 0.9878453038674033, "eval_overall_f1": 0.9433962264150944, "eval_overall_precision": 0.9445843828715366, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2844, "eval_samples_per_second": 597.806, "eval_steps_per_second": 10.55, "step": 9024 }, { "epoch": 95.0, "grad_norm": 0.02685665898025036, "learning_rate": 2.5e-06, "loss": 0.0085, "step": 9120 }, { "epoch": 95.0, "eval_LOCATION_f1": 0.9381443298969071, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.91, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9146341463414633, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9316770186335404, "eval_ORGANIZATION_recall": 0.8982035928143712, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.07053723931312561, "eval_overall_accuracy": 0.9875690607734806, "eval_overall_f1": 0.9422110552763819, "eval_overall_precision": 0.9422110552763819, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2842, "eval_samples_per_second": 598.152, "eval_steps_per_second": 10.556, "step": 9120 }, { "epoch": 96.0, "grad_norm": 0.053834784775972366, "learning_rate": 2.0000000000000003e-06, "loss": 0.0087, "step": 9216 }, { "epoch": 96.0, "eval_LOCATION_f1": 0.9381443298969071, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.91, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9179331306990882, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9320987654320988, "eval_ORGANIZATION_recall": 0.9041916167664671, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.07113828510046005, "eval_overall_accuracy": 0.9878453038674033, "eval_overall_f1": 0.9435382685069008, "eval_overall_precision": 0.9423558897243107, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2827, "eval_samples_per_second": 601.439, "eval_steps_per_second": 10.614, "step": 9216 }, { "epoch": 97.0, "grad_norm": 0.7458974123001099, "learning_rate": 1.5e-06, "loss": 0.0079, "step": 9312 }, { "epoch": 97.0, "eval_LOCATION_f1": 0.9381443298969071, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.91, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9051987767584098, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.925, "eval_ORGANIZATION_recall": 0.8862275449101796, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.07249590009450912, "eval_overall_accuracy": 0.987292817679558, "eval_overall_f1": 0.9383647798742139, "eval_overall_precision": 0.9395465994962217, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2845, "eval_samples_per_second": 597.636, "eval_steps_per_second": 10.547, "step": 9312 }, { "epoch": 98.0, "grad_norm": 0.00920681469142437, "learning_rate": 1.0000000000000002e-06, "loss": 0.0072, "step": 9408 }, { "epoch": 98.0, "eval_LOCATION_f1": 0.9381443298969071, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.91, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9085365853658537, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9254658385093167, "eval_ORGANIZATION_recall": 0.8922155688622755, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.07184883952140808, "eval_overall_accuracy": 0.9875690607734806, "eval_overall_f1": 0.9396984924623115, "eval_overall_precision": 0.9396984924623115, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2847, "eval_samples_per_second": 597.086, "eval_steps_per_second": 10.537, "step": 9408 }, { "epoch": 99.0, "grad_norm": 0.07304101437330246, "learning_rate": 5.000000000000001e-07, "loss": 0.0087, "step": 9504 }, { "epoch": 99.0, "eval_LOCATION_f1": 0.9381443298969071, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.91, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9085365853658537, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9254658385093167, "eval_ORGANIZATION_recall": 0.8922155688622755, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.07170115411281586, "eval_overall_accuracy": 0.9875690607734806, "eval_overall_f1": 0.9396984924623115, "eval_overall_precision": 0.9396984924623115, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2853, "eval_samples_per_second": 595.791, "eval_steps_per_second": 10.514, "step": 9504 }, { "epoch": 100.0, "grad_norm": 0.02175908349454403, "learning_rate": 0.0, "loss": 0.0086, "step": 9600 }, { "epoch": 100.0, "eval_LOCATION_f1": 0.9381443298969071, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.91, "eval_LOCATION_recall": 0.9680851063829787, "eval_ORGANIZATION_f1": 0.9085365853658537, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9254658385093167, "eval_ORGANIZATION_recall": 0.8922155688622755, "eval_PERSON_f1": 0.9781021897810219, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9781021897810219, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.07141576707363129, "eval_overall_accuracy": 0.9875690607734806, "eval_overall_f1": 0.9396984924623115, "eval_overall_precision": 0.9396984924623115, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.29, "eval_samples_per_second": 586.299, "eval_steps_per_second": 10.346, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 4297274884593540.0, "train_loss": 0.03890133989353975, "train_runtime": 528.3083, "train_samples_per_second": 289.604, "train_steps_per_second": 18.171 } ], "logging_steps": 500, "max_steps": 9600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 4297274884593540.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }