{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.9398887157440186, "learning_rate": 4.9500000000000004e-05, "loss": 1.0754, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.0, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.0, "eval_ORGANIZATION_recall": 0.0, "eval_PERSON_f1": 0.0, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, "eval_loss": 0.6776708960533142, "eval_overall_accuracy": 0.8373377520022093, "eval_overall_f1": 0.0, "eval_overall_precision": 0.0, "eval_overall_recall": 0.0, "eval_runtime": 0.2714, "eval_samples_per_second": 626.324, "eval_steps_per_second": 11.053, "step": 96 }, { "epoch": 2.0, "grad_norm": 0.9052129983901978, "learning_rate": 4.9e-05, "loss": 0.6397, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.0, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.0, "eval_ORGANIZATION_recall": 0.0, "eval_PERSON_f1": 0.0, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, "eval_loss": 0.5209642052650452, "eval_overall_accuracy": 0.8381662524164596, "eval_overall_f1": 0.0, "eval_overall_precision": 0.0, "eval_overall_recall": 0.0, "eval_runtime": 0.2787, "eval_samples_per_second": 610.043, "eval_steps_per_second": 10.765, "step": 192 }, { "epoch": 3.0, "grad_norm": 1.0271474123001099, "learning_rate": 4.85e-05, "loss": 0.4876, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.10526315789473682, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.23333333333333334, "eval_LOCATION_recall": 0.06796116504854369, "eval_ORGANIZATION_f1": 0.23741007194244604, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.308411214953271, "eval_ORGANIZATION_recall": 0.19298245614035087, "eval_PERSON_f1": 0.27715355805243447, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.27205882352941174, "eval_PERSON_recall": 0.2824427480916031, "eval_loss": 0.3654983937740326, "eval_overall_accuracy": 0.8732394366197183, "eval_overall_f1": 0.22713864306784662, "eval_overall_precision": 0.28205128205128205, "eval_overall_recall": 0.19012345679012346, "eval_runtime": 0.2739, "eval_samples_per_second": 620.598, "eval_steps_per_second": 10.952, "step": 288 }, { "epoch": 4.0, "grad_norm": 1.1348038911819458, "learning_rate": 4.8e-05, "loss": 0.3447, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.37433155080213903, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.4166666666666667, "eval_LOCATION_recall": 0.33980582524271846, "eval_ORGANIZATION_f1": 0.5520833333333333, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.49765258215962443, "eval_ORGANIZATION_recall": 0.6198830409356725, "eval_PERSON_f1": 0.5512820512820513, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.47513812154696133, "eval_PERSON_recall": 0.6564885496183206, "eval_loss": 0.2575790286064148, "eval_overall_accuracy": 0.9226732946699807, "eval_overall_f1": 0.5141562853907135, "eval_overall_precision": 0.47489539748953974, "eval_overall_recall": 0.5604938271604938, "eval_runtime": 0.2776, "eval_samples_per_second": 612.479, "eval_steps_per_second": 10.808, "step": 384 }, { "epoch": 5.0, "grad_norm": 0.6668264269828796, "learning_rate": 4.75e-05, "loss": 0.2489, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.5333333333333333, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.5652173913043478, "eval_LOCATION_recall": 0.5048543689320388, "eval_ORGANIZATION_f1": 0.6979166666666667, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.6291079812206573, "eval_ORGANIZATION_recall": 0.783625730994152, "eval_PERSON_f1": 0.7847222222222221, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.7197452229299363, "eval_PERSON_recall": 0.8625954198473282, "eval_loss": 0.18802693486213684, "eval_overall_accuracy": 0.9483568075117371, "eval_overall_f1": 0.6897347174163783, "eval_overall_precision": 0.6471861471861472, "eval_overall_recall": 0.7382716049382716, "eval_runtime": 0.2785, "eval_samples_per_second": 610.331, "eval_steps_per_second": 10.771, "step": 480 }, { "epoch": 6.0, "grad_norm": 1.4561339616775513, "learning_rate": 4.7e-05, "loss": 0.1922, "step": 576 }, { "epoch": 6.0, "eval_LOCATION_f1": 0.7378640776699028, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.7378640776699029, "eval_LOCATION_recall": 0.7378640776699029, "eval_ORGANIZATION_f1": 0.7933884297520661, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.75, "eval_ORGANIZATION_recall": 0.8421052631578947, "eval_PERSON_f1": 0.9191176470588235, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.8865248226950354, "eval_PERSON_recall": 0.9541984732824428, "eval_loss": 0.14182497560977936, "eval_overall_accuracy": 0.9635459817729909, "eval_overall_f1": 0.8204518430439953, "eval_overall_precision": 0.7912844036697247, "eval_overall_recall": 0.8518518518518519, "eval_runtime": 0.2785, "eval_samples_per_second": 610.351, "eval_steps_per_second": 10.771, "step": 576 }, { "epoch": 7.0, "grad_norm": 1.417574405670166, "learning_rate": 4.6500000000000005e-05, "loss": 0.1585, "step": 672 }, { "epoch": 7.0, "eval_LOCATION_f1": 0.76056338028169, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.7363636363636363, "eval_LOCATION_recall": 0.7864077669902912, "eval_ORGANIZATION_f1": 0.8, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.7525773195876289, "eval_ORGANIZATION_recall": 0.8538011695906432, "eval_PERSON_f1": 0.929889298892989, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.11958593130111694, "eval_overall_accuracy": 0.963822148577741, "eval_overall_f1": 0.8315665488810364, "eval_overall_precision": 0.795045045045045, "eval_overall_recall": 0.8716049382716049, "eval_runtime": 0.2751, "eval_samples_per_second": 617.859, "eval_steps_per_second": 10.903, "step": 672 }, { "epoch": 8.0, "grad_norm": 0.9025856852531433, "learning_rate": 4.600000000000001e-05, "loss": 0.1378, "step": 768 }, { "epoch": 8.0, "eval_LOCATION_f1": 0.8058252427184465, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8058252427184466, "eval_LOCATION_recall": 0.8058252427184466, "eval_ORGANIZATION_f1": 0.8209366391184574, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.7760416666666666, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9477611940298507, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.927007299270073, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.101407490670681, "eval_overall_accuracy": 0.9696216514774924, "eval_overall_f1": 0.8578255675029869, "eval_overall_precision": 0.8310185185185185, "eval_overall_recall": 0.8864197530864197, "eval_runtime": 0.2751, "eval_samples_per_second": 617.954, "eval_steps_per_second": 10.905, "step": 768 }, { "epoch": 9.0, "grad_norm": 0.8474039435386658, "learning_rate": 4.55e-05, "loss": 0.1249, "step": 864 }, { "epoch": 9.0, "eval_LOCATION_f1": 0.8229665071770335, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8113207547169812, "eval_LOCATION_recall": 0.8349514563106796, "eval_ORGANIZATION_f1": 0.8121546961325967, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.7696335078534031, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9442379182156133, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9202898550724637, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.09287155419588089, "eval_overall_accuracy": 0.9712786523059929, "eval_overall_f1": 0.8571428571428572, "eval_overall_precision": 0.8275862068965517, "eval_overall_recall": 0.8888888888888888, "eval_runtime": 0.2744, "eval_samples_per_second": 619.563, "eval_steps_per_second": 10.933, "step": 864 }, { "epoch": 10.0, "grad_norm": 0.8552592396736145, "learning_rate": 4.5e-05, "loss": 0.1157, "step": 960 }, { "epoch": 10.0, "eval_LOCATION_f1": 0.8333333333333333, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8415841584158416, "eval_LOCATION_recall": 0.8252427184466019, "eval_ORGANIZATION_f1": 0.8296703296703298, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.7823834196891192, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9333333333333332, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9064748201438849, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.08417543768882751, "eval_overall_accuracy": 0.971830985915493, "eval_overall_f1": 0.8639618138424822, "eval_overall_precision": 0.836027713625866, "eval_overall_recall": 0.8938271604938272, "eval_runtime": 0.2787, "eval_samples_per_second": 609.919, "eval_steps_per_second": 10.763, "step": 960 }, { "epoch": 11.0, "grad_norm": 1.274109959602356, "learning_rate": 4.4500000000000004e-05, "loss": 0.106, "step": 1056 }, { "epoch": 11.0, "eval_LOCATION_f1": 0.8502415458937198, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8461538461538461, "eval_LOCATION_recall": 0.8543689320388349, "eval_ORGANIZATION_f1": 0.8271954674220962, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8021978021978022, "eval_ORGANIZATION_recall": 0.8538011695906432, "eval_PERSON_f1": 0.9509433962264152, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9402985074626866, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.07744017243385315, "eval_overall_accuracy": 0.9751449875724938, "eval_overall_f1": 0.8727272727272727, "eval_overall_precision": 0.8571428571428571, "eval_overall_recall": 0.8888888888888888, "eval_runtime": 0.2742, "eval_samples_per_second": 620.087, "eval_steps_per_second": 10.943, "step": 1056 }, { "epoch": 12.0, "grad_norm": 0.8402628898620605, "learning_rate": 4.4000000000000006e-05, "loss": 0.1021, "step": 1152 }, { "epoch": 12.0, "eval_LOCATION_f1": 0.8666666666666667, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8504672897196262, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.84593837535014, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8118279569892473, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9333333333333332, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9064748201438849, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.0777238979935646, "eval_overall_accuracy": 0.9754211543772439, "eval_overall_f1": 0.879330943847073, "eval_overall_precision": 0.8518518518518519, "eval_overall_recall": 0.908641975308642, "eval_runtime": 0.2759, "eval_samples_per_second": 616.061, "eval_steps_per_second": 10.872, "step": 1152 }, { "epoch": 13.0, "grad_norm": 2.429759979248047, "learning_rate": 4.35e-05, "loss": 0.0959, "step": 1248 }, { "epoch": 13.0, "eval_LOCATION_f1": 0.8695652173913043, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8653846153846154, "eval_LOCATION_recall": 0.8737864077669902, "eval_ORGANIZATION_f1": 0.8753623188405797, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.867816091954023, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9545454545454546, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9473684210526315, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.0699063166975975, "eval_overall_accuracy": 0.9790113228389947, "eval_overall_f1": 0.8995098039215687, "eval_overall_precision": 0.8929440389294404, "eval_overall_recall": 0.9061728395061729, "eval_runtime": 0.2744, "eval_samples_per_second": 619.503, "eval_steps_per_second": 10.932, "step": 1248 }, { "epoch": 14.0, "grad_norm": 0.5146805644035339, "learning_rate": 4.3e-05, "loss": 0.0915, "step": 1344 }, { "epoch": 14.0, "eval_LOCATION_f1": 0.8750000000000001, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8666666666666667, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8672566371681416, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.875, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9509433962264152, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9402985074626866, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.06974173337221146, "eval_overall_accuracy": 0.9787351560342447, "eval_overall_f1": 0.8965517241379312, "eval_overall_precision": 0.8943488943488943, "eval_overall_recall": 0.8987654320987655, "eval_runtime": 0.2722, "eval_samples_per_second": 624.642, "eval_steps_per_second": 11.023, "step": 1344 }, { "epoch": 15.0, "grad_norm": 0.906639039516449, "learning_rate": 4.25e-05, "loss": 0.0875, "step": 1440 }, { "epoch": 15.0, "eval_LOCATION_f1": 0.8792270531400966, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.883495145631068, "eval_ORGANIZATION_f1": 0.8698060941828255, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8263157894736842, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9473684210526315, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9333333333333333, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.0640181228518486, "eval_overall_accuracy": 0.9784589892294946, "eval_overall_f1": 0.8968824940047962, "eval_overall_precision": 0.8717948717948718, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2737, "eval_samples_per_second": 621.084, "eval_steps_per_second": 10.96, "step": 1440 }, { "epoch": 16.0, "grad_norm": 1.1430482864379883, "learning_rate": 4.2e-05, "loss": 0.0837, "step": 1536 }, { "epoch": 16.0, "eval_LOCATION_f1": 0.8867924528301887, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8623853211009175, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8627450980392157, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8279569892473119, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.06158023327589035, "eval_overall_accuracy": 0.9795636564484949, "eval_overall_f1": 0.8992805755395683, "eval_overall_precision": 0.8741258741258742, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.279, "eval_samples_per_second": 609.346, "eval_steps_per_second": 10.753, "step": 1536 }, { "epoch": 17.0, "grad_norm": 0.754805326461792, "learning_rate": 4.15e-05, "loss": 0.0799, "step": 1632 }, { "epoch": 17.0, "eval_LOCATION_f1": 0.8985507246376813, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8942307692307693, "eval_LOCATION_recall": 0.9029126213592233, "eval_ORGANIZATION_f1": 0.8739495798319329, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8387096774193549, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.057049937546253204, "eval_overall_accuracy": 0.9803921568627451, "eval_overall_f1": 0.9082125603864735, "eval_overall_precision": 0.8888888888888888, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2734, "eval_samples_per_second": 621.802, "eval_steps_per_second": 10.973, "step": 1632 }, { "epoch": 18.0, "grad_norm": 0.9037827849388123, "learning_rate": 4.1e-05, "loss": 0.0763, "step": 1728 }, { "epoch": 18.0, "eval_LOCATION_f1": 0.8952380952380954, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8785046728971962, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8799999999999999, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8603351955307262, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.05494200810790062, "eval_overall_accuracy": 0.9806683236674951, "eval_overall_f1": 0.9124087591240877, "eval_overall_precision": 0.8992805755395683, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2738, "eval_samples_per_second": 620.831, "eval_steps_per_second": 10.956, "step": 1728 }, { "epoch": 19.0, "grad_norm": 0.9282684326171875, "learning_rate": 4.05e-05, "loss": 0.0732, "step": 1824 }, { "epoch": 19.0, "eval_LOCATION_f1": 0.8942307692307693, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8857142857142857, "eval_LOCATION_recall": 0.9029126213592233, "eval_ORGANIZATION_f1": 0.8862973760932944, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8837209302325582, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.05610544607043266, "eval_overall_accuracy": 0.9817729908864954, "eval_overall_f1": 0.915129151291513, "eval_overall_precision": 0.9117647058823529, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2751, "eval_samples_per_second": 617.967, "eval_steps_per_second": 10.905, "step": 1824 }, { "epoch": 20.0, "grad_norm": 0.8386895656585693, "learning_rate": 4e-05, "loss": 0.072, "step": 1920 }, { "epoch": 20.0, "eval_LOCATION_f1": 0.8995215311004785, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8867924528301887, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8895184135977338, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8626373626373627, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9808429118773947, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9846153846153847, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.0517982579767704, "eval_overall_accuracy": 0.9820491576912456, "eval_overall_f1": 0.9210206561360874, "eval_overall_precision": 0.9066985645933014, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2732, "eval_samples_per_second": 622.235, "eval_steps_per_second": 10.981, "step": 1920 }, { "epoch": 21.0, "grad_norm": 0.8867515325546265, "learning_rate": 3.9500000000000005e-05, "loss": 0.069, "step": 2016 }, { "epoch": 21.0, "eval_LOCATION_f1": 0.8761904761904761, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8598130841121495, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.9026548672566371, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9107142857142857, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.05083949863910675, "eval_overall_accuracy": 0.9831538249102458, "eval_overall_f1": 0.9173859432799014, "eval_overall_precision": 0.916256157635468, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2728, "eval_samples_per_second": 623.197, "eval_steps_per_second": 10.998, "step": 2016 }, { "epoch": 22.0, "grad_norm": 1.4264206886291504, "learning_rate": 3.9000000000000006e-05, "loss": 0.0681, "step": 2112 }, { "epoch": 22.0, "eval_LOCATION_f1": 0.8909952606635072, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8703703703703703, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8851540616246498, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8494623655913979, "eval_ORGANIZATION_recall": 0.9239766081871345, "eval_PERSON_f1": 0.9808429118773947, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9846153846153847, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.050989117473363876, "eval_overall_accuracy": 0.9828776581054958, "eval_overall_f1": 0.916767189384801, "eval_overall_precision": 0.8962264150943396, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2754, "eval_samples_per_second": 617.372, "eval_steps_per_second": 10.895, "step": 2112 }, { "epoch": 23.0, "grad_norm": 1.5350762605667114, "learning_rate": 3.85e-05, "loss": 0.068, "step": 2208 }, { "epoch": 23.0, "eval_LOCATION_f1": 0.8909952606635072, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8703703703703703, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.9011627906976746, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8959537572254336, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9808429118773947, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9846153846153847, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04872269183397293, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9240196078431373, "eval_overall_precision": 0.9172749391727494, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2744, "eval_samples_per_second": 619.542, "eval_steps_per_second": 10.933, "step": 2208 }, { "epoch": 24.0, "grad_norm": 0.9508764147758484, "learning_rate": 3.8e-05, "loss": 0.0646, "step": 2304 }, { "epoch": 24.0, "eval_LOCATION_f1": 0.8755760368663594, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8333333333333334, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8862275449101796, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9079754601226994, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9581749049429659, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9545454545454546, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.05020871385931969, "eval_overall_accuracy": 0.9820491576912456, "eval_overall_f1": 0.9066339066339066, "eval_overall_precision": 0.902200488997555, "eval_overall_recall": 0.9111111111111111, "eval_runtime": 0.271, "eval_samples_per_second": 627.4, "eval_steps_per_second": 11.072, "step": 2304 }, { "epoch": 25.0, "grad_norm": 1.1497703790664673, "learning_rate": 3.7500000000000003e-05, "loss": 0.0641, "step": 2400 }, { "epoch": 25.0, "eval_LOCATION_f1": 0.8773584905660378, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8532110091743119, "eval_LOCATION_recall": 0.9029126213592233, "eval_ORGANIZATION_f1": 0.8953488372093024, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8901734104046243, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.047795675694942474, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9144254278728606, "eval_overall_precision": 0.9055690072639225, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2834, "eval_samples_per_second": 599.762, "eval_steps_per_second": 10.584, "step": 2400 }, { "epoch": 26.0, "grad_norm": 0.8473774790763855, "learning_rate": 3.7e-05, "loss": 0.0632, "step": 2496 }, { "epoch": 26.0, "eval_LOCATION_f1": 0.8888888888888888, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8846153846153846, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.9008498583569404, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8736263736263736, "eval_ORGANIZATION_recall": 0.9298245614035088, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04621800407767296, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9197080291970804, "eval_overall_precision": 0.9064748201438849, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2754, "eval_samples_per_second": 617.248, "eval_steps_per_second": 10.893, "step": 2496 }, { "epoch": 27.0, "grad_norm": 1.8567970991134644, "learning_rate": 3.65e-05, "loss": 0.0591, "step": 2592 }, { "epoch": 27.0, "eval_LOCATION_f1": 0.8899521531100479, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8773584905660378, "eval_LOCATION_recall": 0.9029126213592233, "eval_ORGANIZATION_f1": 0.8985507246376812, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8908045977011494, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.044693056493997574, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9191176470588235, "eval_overall_precision": 0.9124087591240876, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2767, "eval_samples_per_second": 614.368, "eval_steps_per_second": 10.842, "step": 2592 }, { "epoch": 28.0, "grad_norm": 0.6628684997558594, "learning_rate": 3.6e-05, "loss": 0.0586, "step": 2688 }, { "epoch": 28.0, "eval_LOCATION_f1": 0.8942307692307693, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8857142857142857, "eval_LOCATION_recall": 0.9029126213592233, "eval_ORGANIZATION_f1": 0.8927536231884058, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8850574712643678, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04484202340245247, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9177914110429448, "eval_overall_precision": 0.9121951219512195, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2726, "eval_samples_per_second": 623.554, "eval_steps_per_second": 11.004, "step": 2688 }, { "epoch": 29.0, "grad_norm": 0.7217922806739807, "learning_rate": 3.55e-05, "loss": 0.0558, "step": 2784 }, { "epoch": 29.0, "eval_LOCATION_f1": 0.911764705882353, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9207920792079208, "eval_LOCATION_recall": 0.9029126213592233, "eval_ORGANIZATION_f1": 0.8939828080229225, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8764044943820225, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.044037312269210815, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9215686274509803, "eval_overall_precision": 0.9148418491484185, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2741, "eval_samples_per_second": 620.158, "eval_steps_per_second": 10.944, "step": 2784 }, { "epoch": 30.0, "grad_norm": 1.4499776363372803, "learning_rate": 3.5e-05, "loss": 0.0568, "step": 2880 }, { "epoch": 30.0, "eval_LOCATION_f1": 0.8985507246376813, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8942307692307693, "eval_LOCATION_recall": 0.9029126213592233, "eval_ORGANIZATION_f1": 0.9005847953216374, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9005847953216374, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04531291872262955, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9223181257706535, "eval_overall_precision": 0.9211822660098522, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2743, "eval_samples_per_second": 619.82, "eval_steps_per_second": 10.938, "step": 2880 }, { "epoch": 31.0, "grad_norm": 0.696540355682373, "learning_rate": 3.45e-05, "loss": 0.0547, "step": 2976 }, { "epoch": 31.0, "eval_LOCATION_f1": 0.8995215311004785, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8867924528301887, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8914285714285715, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8715083798882681, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043637365102767944, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9183922046285019, "eval_overall_precision": 0.90625, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2736, "eval_samples_per_second": 621.339, "eval_steps_per_second": 10.965, "step": 2976 }, { "epoch": 32.0, "grad_norm": 0.7981347441673279, "learning_rate": 3.4000000000000007e-05, "loss": 0.0544, "step": 3072 }, { "epoch": 32.0, "eval_LOCATION_f1": 0.9004739336492891, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8796296296296297, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.890207715133531, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9036144578313253, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04337773472070694, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9185185185185185, "eval_overall_precision": 0.9185185185185185, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2747, "eval_samples_per_second": 618.782, "eval_steps_per_second": 10.92, "step": 3072 }, { "epoch": 33.0, "grad_norm": 1.1293216943740845, "learning_rate": 3.35e-05, "loss": 0.0509, "step": 3168 }, { "epoch": 33.0, "eval_LOCATION_f1": 0.8826291079812206, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8545454545454545, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8882175226586103, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.91875, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0449991337954998, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.913151364764268, "eval_overall_precision": 0.9177057356608479, "eval_overall_recall": 0.908641975308642, "eval_runtime": 0.2748, "eval_samples_per_second": 618.574, "eval_steps_per_second": 10.916, "step": 3168 }, { "epoch": 34.0, "grad_norm": 0.5707913041114807, "learning_rate": 3.3e-05, "loss": 0.05, "step": 3264 }, { "epoch": 34.0, "eval_LOCATION_f1": 0.8888888888888888, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8846153846153846, "eval_LOCATION_recall": 0.8932038834951457, "eval_ORGANIZATION_f1": 0.9058823529411764, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9112426035502958, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9808429118773947, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9846153846153847, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04190775752067566, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9257425742574257, "eval_overall_precision": 0.9280397022332506, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2732, "eval_samples_per_second": 622.147, "eval_steps_per_second": 10.979, "step": 3264 }, { "epoch": 35.0, "grad_norm": 0.952944278717041, "learning_rate": 3.2500000000000004e-05, "loss": 0.0496, "step": 3360 }, { "epoch": 35.0, "eval_LOCATION_f1": 0.8837209302325582, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8482142857142857, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8941176470588235, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8994082840236687, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04267964884638786, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9155446756425949, "eval_overall_precision": 0.9077669902912622, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2746, "eval_samples_per_second": 619.001, "eval_steps_per_second": 10.924, "step": 3360 }, { "epoch": 36.0, "grad_norm": 1.0468652248382568, "learning_rate": 3.2000000000000005e-05, "loss": 0.0504, "step": 3456 }, { "epoch": 36.0, "eval_LOCATION_f1": 0.8785046728971964, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8468468468468469, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8948948948948949, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9197530864197531, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04330718144774437, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9147095179233622, "eval_overall_precision": 0.9158415841584159, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.275, "eval_samples_per_second": 618.138, "eval_steps_per_second": 10.908, "step": 3456 }, { "epoch": 37.0, "grad_norm": 1.458516001701355, "learning_rate": 3.15e-05, "loss": 0.0485, "step": 3552 }, { "epoch": 37.0, "eval_LOCATION_f1": 0.9082125603864736, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9038461538461539, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8939828080229225, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8764044943820225, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.042913712561130524, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9217603911980441, "eval_overall_precision": 0.9128329297820823, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2758, "eval_samples_per_second": 616.35, "eval_steps_per_second": 10.877, "step": 3552 }, { "epoch": 38.0, "grad_norm": 0.37146496772766113, "learning_rate": 3.1e-05, "loss": 0.0466, "step": 3648 }, { "epoch": 38.0, "eval_LOCATION_f1": 0.8857142857142858, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8691588785046729, "eval_LOCATION_recall": 0.9029126213592233, "eval_ORGANIZATION_f1": 0.8908554572271385, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8988095238095238, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04390954226255417, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9149198520345252, "eval_overall_precision": 0.9137931034482759, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2748, "eval_samples_per_second": 618.74, "eval_steps_per_second": 10.919, "step": 3648 }, { "epoch": 39.0, "grad_norm": 1.1566592454910278, "learning_rate": 3.05e-05, "loss": 0.0464, "step": 3744 }, { "epoch": 39.0, "eval_LOCATION_f1": 0.8971962616822431, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8648648648648649, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8961424332344213, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9096385542168675, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.042460016906261444, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.920049200492005, "eval_overall_precision": 0.9166666666666666, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2779, "eval_samples_per_second": 611.838, "eval_steps_per_second": 10.797, "step": 3744 }, { "epoch": 40.0, "grad_norm": 0.4882775545120239, "learning_rate": 3e-05, "loss": 0.0457, "step": 3840 }, { "epoch": 40.0, "eval_LOCATION_f1": 0.903846153846154, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8952380952380953, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8914956011730205, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8941176470588236, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04286734014749527, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9198520345252774, "eval_overall_precision": 0.9187192118226601, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2771, "eval_samples_per_second": 613.556, "eval_steps_per_second": 10.827, "step": 3840 }, { "epoch": 41.0, "grad_norm": 0.6915533542633057, "learning_rate": 2.95e-05, "loss": 0.0454, "step": 3936 }, { "epoch": 41.0, "eval_LOCATION_f1": 0.9029126213592235, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9029126213592233, "eval_LOCATION_recall": 0.9029126213592233, "eval_ORGANIZATION_f1": 0.8939828080229225, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8764044943820225, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04125836864113808, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9204406364749083, "eval_overall_precision": 0.912621359223301, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2754, "eval_samples_per_second": 617.379, "eval_steps_per_second": 10.895, "step": 3936 }, { "epoch": 42.0, "grad_norm": 0.8797712326049805, "learning_rate": 2.9e-05, "loss": 0.0446, "step": 4032 }, { "epoch": 42.0, "eval_LOCATION_f1": 0.9090909090909091, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8962264150943396, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8914956011730205, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8941176470588236, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04334442317485809, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9211822660098522, "eval_overall_precision": 0.918918918918919, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2779, "eval_samples_per_second": 611.799, "eval_steps_per_second": 10.796, "step": 4032 }, { "epoch": 43.0, "grad_norm": 0.4275752902030945, "learning_rate": 2.8499999999999998e-05, "loss": 0.0461, "step": 4128 }, { "epoch": 43.0, "eval_LOCATION_f1": 0.892018779342723, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8636363636363636, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8973607038123168, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04322684183716774, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9191176470588235, "eval_overall_precision": 0.9124087591240876, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2731, "eval_samples_per_second": 622.429, "eval_steps_per_second": 10.984, "step": 4128 }, { "epoch": 44.0, "grad_norm": 0.738633930683136, "learning_rate": 2.8000000000000003e-05, "loss": 0.0445, "step": 4224 }, { "epoch": 44.0, "eval_LOCATION_f1": 0.8930232558139534, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8571428571428571, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8764705882352941, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8816568047337278, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0451325960457325, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9106487148102815, "eval_overall_precision": 0.9029126213592233, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2782, "eval_samples_per_second": 610.964, "eval_steps_per_second": 10.782, "step": 4224 }, { "epoch": 45.0, "grad_norm": 1.2641332149505615, "learning_rate": 2.7500000000000004e-05, "loss": 0.0439, "step": 4320 }, { "epoch": 45.0, "eval_LOCATION_f1": 0.9004739336492891, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8796296296296297, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8961424332344213, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9096385542168675, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04270249605178833, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9209876543209876, "eval_overall_precision": 0.9209876543209876, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.274, "eval_samples_per_second": 620.372, "eval_steps_per_second": 10.948, "step": 4320 }, { "epoch": 46.0, "grad_norm": 0.8629418015480042, "learning_rate": 2.7000000000000002e-05, "loss": 0.0436, "step": 4416 }, { "epoch": 46.0, "eval_LOCATION_f1": 0.8807339449541284, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8347826086956521, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.888888888888889, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9135802469135802, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04500781372189522, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9126691266912669, "eval_overall_precision": 0.9093137254901961, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2794, "eval_samples_per_second": 608.347, "eval_steps_per_second": 10.736, "step": 4416 }, { "epoch": 47.0, "grad_norm": 0.4795588254928589, "learning_rate": 2.6500000000000004e-05, "loss": 0.0436, "step": 4512 }, { "epoch": 47.0, "eval_LOCATION_f1": 0.9029126213592235, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9029126213592233, "eval_LOCATION_recall": 0.9029126213592233, "eval_ORGANIZATION_f1": 0.893371757925072, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8806818181818182, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04104533791542053, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9202453987730062, "eval_overall_precision": 0.9146341463414634, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2745, "eval_samples_per_second": 619.245, "eval_steps_per_second": 10.928, "step": 4512 }, { "epoch": 48.0, "grad_norm": 0.480375200510025, "learning_rate": 2.6000000000000002e-05, "loss": 0.0418, "step": 4608 }, { "epoch": 48.0, "eval_LOCATION_f1": 0.8807339449541284, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8347826086956521, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8948948948948949, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9197530864197531, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04590339586138725, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.915129151291513, "eval_overall_precision": 0.9117647058823529, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.275, "eval_samples_per_second": 618.199, "eval_steps_per_second": 10.909, "step": 4608 }, { "epoch": 49.0, "grad_norm": 0.09010949730873108, "learning_rate": 2.5500000000000003e-05, "loss": 0.0393, "step": 4704 }, { "epoch": 49.0, "eval_LOCATION_f1": 0.8807339449541284, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8347826086956521, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8835820895522388, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9024390243902439, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043848246335983276, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9104294478527607, "eval_overall_precision": 0.9048780487804878, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2732, "eval_samples_per_second": 622.256, "eval_steps_per_second": 10.981, "step": 4704 }, { "epoch": 50.0, "grad_norm": 1.1188256740570068, "learning_rate": 2.5e-05, "loss": 0.0403, "step": 4800 }, { "epoch": 50.0, "eval_LOCATION_f1": 0.8796296296296297, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8407079646017699, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8783382789317508, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.891566265060241, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04652123153209686, "eval_overall_accuracy": 0.983982325324496, "eval_overall_f1": 0.9079754601226994, "eval_overall_precision": 0.9024390243902439, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2762, "eval_samples_per_second": 615.585, "eval_steps_per_second": 10.863, "step": 4800 }, { "epoch": 51.0, "grad_norm": 1.2530676126480103, "learning_rate": 2.45e-05, "loss": 0.0397, "step": 4896 }, { "epoch": 51.0, "eval_LOCATION_f1": 0.888888888888889, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8495575221238938, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.891566265060241, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9192546583850931, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04545534402132034, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9160493827160494, "eval_overall_precision": 0.9160493827160494, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2749, "eval_samples_per_second": 618.391, "eval_steps_per_second": 10.913, "step": 4896 }, { "epoch": 52.0, "grad_norm": 0.9018911719322205, "learning_rate": 2.4e-05, "loss": 0.0398, "step": 4992 }, { "epoch": 52.0, "eval_LOCATION_f1": 0.903846153846154, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8952380952380953, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8901734104046243, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.88, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.042693160474300385, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9191176470588235, "eval_overall_precision": 0.9124087591240876, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2751, "eval_samples_per_second": 617.987, "eval_steps_per_second": 10.906, "step": 4992 }, { "epoch": 53.0, "grad_norm": 0.6297587156295776, "learning_rate": 2.35e-05, "loss": 0.0394, "step": 5088 }, { "epoch": 53.0, "eval_LOCATION_f1": 0.8981481481481481, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8584070796460177, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8862275449101796, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9079754601226994, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0448770709335804, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.916256157635468, "eval_overall_precision": 0.914004914004914, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2731, "eval_samples_per_second": 622.408, "eval_steps_per_second": 10.984, "step": 5088 }, { "epoch": 54.0, "grad_norm": 1.4009506702423096, "learning_rate": 2.3000000000000003e-05, "loss": 0.0378, "step": 5184 }, { "epoch": 54.0, "eval_LOCATION_f1": 0.8952380952380954, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8785046728971962, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8843930635838151, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8742857142857143, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04195466637611389, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9144254278728606, "eval_overall_precision": 0.9055690072639225, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2725, "eval_samples_per_second": 623.761, "eval_steps_per_second": 11.008, "step": 5184 }, { "epoch": 55.0, "grad_norm": 2.148137331008911, "learning_rate": 2.25e-05, "loss": 0.0387, "step": 5280 }, { "epoch": 55.0, "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8727272727272727, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8816568047337278, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8922155688622755, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.044585153460502625, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.915129151291513, "eval_overall_precision": 0.9117647058823529, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2734, "eval_samples_per_second": 621.906, "eval_steps_per_second": 10.975, "step": 5280 }, { "epoch": 56.0, "grad_norm": 0.7773046493530273, "learning_rate": 2.2000000000000003e-05, "loss": 0.0369, "step": 5376 }, { "epoch": 56.0, "eval_LOCATION_f1": 0.8878504672897196, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8558558558558559, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8739002932551321, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8764705882352941, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043501876294612885, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.908200734394125, "eval_overall_precision": 0.9004854368932039, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2737, "eval_samples_per_second": 621.061, "eval_steps_per_second": 10.96, "step": 5376 }, { "epoch": 57.0, "grad_norm": 1.5469814538955688, "learning_rate": 2.15e-05, "loss": 0.0383, "step": 5472 }, { "epoch": 57.0, "eval_LOCATION_f1": 0.9023255813953488, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8660714285714286, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8849557522123894, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8928571428571429, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.043298330157995224, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9166666666666667, "eval_overall_precision": 0.9099756690997567, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2721, "eval_samples_per_second": 624.765, "eval_steps_per_second": 11.025, "step": 5472 }, { "epoch": 58.0, "grad_norm": 0.7509494423866272, "learning_rate": 2.1e-05, "loss": 0.0368, "step": 5568 }, { "epoch": 58.0, "eval_LOCATION_f1": 0.9056603773584906, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8807339449541285, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8888888888888888, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8888888888888888, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04441133141517639, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9191176470588235, "eval_overall_precision": 0.9124087591240876, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2731, "eval_samples_per_second": 622.573, "eval_steps_per_second": 10.987, "step": 5568 }, { "epoch": 59.0, "grad_norm": 0.4354134500026703, "learning_rate": 2.05e-05, "loss": 0.0349, "step": 5664 }, { "epoch": 59.0, "eval_LOCATION_f1": 0.8867924528301887, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8623853211009175, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8705882352941177, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8757396449704142, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04510478302836418, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9066339066339066, "eval_overall_precision": 0.902200488997555, "eval_overall_recall": 0.9111111111111111, "eval_runtime": 0.2733, "eval_samples_per_second": 621.93, "eval_steps_per_second": 10.975, "step": 5664 }, { "epoch": 60.0, "grad_norm": 0.24447734653949738, "learning_rate": 2e-05, "loss": 0.0357, "step": 5760 }, { "epoch": 60.0, "eval_LOCATION_f1": 0.8962264150943398, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8715596330275229, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8724035608308606, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8855421686746988, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.045494548976421356, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9099876695437731, "eval_overall_precision": 0.9088669950738916, "eval_overall_recall": 0.9111111111111111, "eval_runtime": 0.2756, "eval_samples_per_second": 616.844, "eval_steps_per_second": 10.885, "step": 5760 }, { "epoch": 61.0, "grad_norm": 0.40027135610580444, "learning_rate": 1.9500000000000003e-05, "loss": 0.0365, "step": 5856 }, { "epoch": 61.0, "eval_LOCATION_f1": 0.9074074074074074, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8672566371681416, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.045581597834825516, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.916256157635468, "eval_overall_precision": 0.914004914004914, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2724, "eval_samples_per_second": 624.065, "eval_steps_per_second": 11.013, "step": 5856 }, { "epoch": 62.0, "grad_norm": 1.5048304796218872, "learning_rate": 1.9e-05, "loss": 0.0365, "step": 5952 }, { "epoch": 62.0, "eval_LOCATION_f1": 0.9, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8461538461538461, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.8862275449101796, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9079754601226994, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04551342874765396, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9166666666666667, "eval_overall_precision": 0.9099756690997567, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2721, "eval_samples_per_second": 624.83, "eval_steps_per_second": 11.026, "step": 5952 }, { "epoch": 63.0, "grad_norm": 1.6805459260940552, "learning_rate": 1.85e-05, "loss": 0.034, "step": 6048 }, { "epoch": 63.0, "eval_LOCATION_f1": 0.8981481481481481, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8584070796460177, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8764705882352941, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8816568047337278, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04244678094983101, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9119804400977994, "eval_overall_precision": 0.9031476997578692, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2735, "eval_samples_per_second": 621.581, "eval_steps_per_second": 10.969, "step": 6048 }, { "epoch": 64.0, "grad_norm": 0.22939109802246094, "learning_rate": 1.8e-05, "loss": 0.0347, "step": 6144 }, { "epoch": 64.0, "eval_LOCATION_f1": 0.903225806451613, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8596491228070176, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8828828828828829, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9074074074074074, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.046498287469148636, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.916256157635468, "eval_overall_precision": 0.914004914004914, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2752, "eval_samples_per_second": 617.742, "eval_steps_per_second": 10.901, "step": 6144 }, { "epoch": 65.0, "grad_norm": 0.5688343644142151, "learning_rate": 1.75e-05, "loss": 0.0342, "step": 6240 }, { "epoch": 65.0, "eval_LOCATION_f1": 0.8990825688073394, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8521739130434782, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8895522388059701, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9085365853658537, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04455501213669777, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9177914110429448, "eval_overall_precision": 0.9121951219512195, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2714, "eval_samples_per_second": 626.396, "eval_steps_per_second": 11.054, "step": 6240 }, { "epoch": 66.0, "grad_norm": 0.7811012268066406, "learning_rate": 1.7000000000000003e-05, "loss": 0.034, "step": 6336 }, { "epoch": 66.0, "eval_LOCATION_f1": 0.9124423963133641, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.868421052631579, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.8862275449101796, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9079754601226994, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04501819983124733, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.920049200492005, "eval_overall_precision": 0.9166666666666666, "eval_overall_recall": 0.9234567901234568, "eval_runtime": 0.2711, "eval_samples_per_second": 626.972, "eval_steps_per_second": 11.064, "step": 6336 }, { "epoch": 67.0, "grad_norm": 0.5282329320907593, "learning_rate": 1.65e-05, "loss": 0.0338, "step": 6432 }, { "epoch": 67.0, "eval_LOCATION_f1": 0.903225806451613, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8596491228070176, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8835820895522388, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9024390243902439, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04584520682692528, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9164619164619164, "eval_overall_precision": 0.9119804400977995, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2786, "eval_samples_per_second": 610.088, "eval_steps_per_second": 10.766, "step": 6432 }, { "epoch": 68.0, "grad_norm": 1.0224095582962036, "learning_rate": 1.6000000000000003e-05, "loss": 0.0368, "step": 6528 }, { "epoch": 68.0, "eval_LOCATION_f1": 0.8930232558139534, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8571428571428571, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.875, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8909090909090909, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9808429118773947, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9846153846153847, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.046090107411146164, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9137931034482758, "eval_overall_precision": 0.9115479115479116, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2751, "eval_samples_per_second": 618.005, "eval_steps_per_second": 10.906, "step": 6528 }, { "epoch": 69.0, "grad_norm": 1.2957454919815063, "learning_rate": 1.55e-05, "loss": 0.036, "step": 6624 }, { "epoch": 69.0, "eval_LOCATION_f1": 0.8930232558139534, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8571428571428571, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.875, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8909090909090909, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.045359622687101364, "eval_overall_accuracy": 0.983982325324496, "eval_overall_f1": 0.9090909090909091, "eval_overall_precision": 0.9046454767726161, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2752, "eval_samples_per_second": 617.691, "eval_steps_per_second": 10.9, "step": 6624 }, { "epoch": 70.0, "grad_norm": 0.5331136584281921, "learning_rate": 1.5e-05, "loss": 0.0318, "step": 6720 }, { "epoch": 70.0, "eval_LOCATION_f1": 0.8981481481481481, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8584070796460177, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8862275449101796, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9079754601226994, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.047888755798339844, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.916256157635468, "eval_overall_precision": 0.914004914004914, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2721, "eval_samples_per_second": 624.705, "eval_steps_per_second": 11.024, "step": 6720 }, { "epoch": 71.0, "grad_norm": 0.6876541376113892, "learning_rate": 1.45e-05, "loss": 0.0327, "step": 6816 }, { "epoch": 71.0, "eval_LOCATION_f1": 0.8990825688073394, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8521739130434782, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04490681737661362, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9128834355828221, "eval_overall_precision": 0.9073170731707317, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2733, "eval_samples_per_second": 621.932, "eval_steps_per_second": 10.975, "step": 6816 }, { "epoch": 72.0, "grad_norm": 0.654994785785675, "learning_rate": 1.4000000000000001e-05, "loss": 0.0328, "step": 6912 }, { "epoch": 72.0, "eval_LOCATION_f1": 0.903225806451613, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8596491228070176, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.874251497005988, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8957055214723927, "eval_ORGANIZATION_recall": 0.8538011695906432, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04899928718805313, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9115479115479117, "eval_overall_precision": 0.9070904645476773, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2714, "eval_samples_per_second": 626.475, "eval_steps_per_second": 11.055, "step": 6912 }, { "epoch": 73.0, "grad_norm": 0.8341560363769531, "learning_rate": 1.3500000000000001e-05, "loss": 0.0325, "step": 7008 }, { "epoch": 73.0, "eval_LOCATION_f1": 0.9074074074074074, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8672566371681416, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8862275449101796, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9079754601226994, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04653839394450188, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9175891758917589, "eval_overall_precision": 0.9142156862745098, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2731, "eval_samples_per_second": 622.455, "eval_steps_per_second": 10.985, "step": 7008 }, { "epoch": 74.0, "grad_norm": 0.6812028288841248, "learning_rate": 1.3000000000000001e-05, "loss": 0.0337, "step": 7104 }, { "epoch": 74.0, "eval_LOCATION_f1": 0.8990825688073394, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8521739130434782, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04794888198375702, "eval_overall_accuracy": 0.983706158519746, "eval_overall_f1": 0.9128834355828221, "eval_overall_precision": 0.9073170731707317, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2744, "eval_samples_per_second": 619.561, "eval_steps_per_second": 10.933, "step": 7104 }, { "epoch": 75.0, "grad_norm": 1.309098482131958, "learning_rate": 1.25e-05, "loss": 0.0329, "step": 7200 }, { "epoch": 75.0, "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8727272727272727, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8776119402985074, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8963414634146342, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04596688970923424, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9135802469135802, "eval_overall_precision": 0.9135802469135802, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2745, "eval_samples_per_second": 619.334, "eval_steps_per_second": 10.929, "step": 7200 }, { "epoch": 76.0, "grad_norm": 0.5950937867164612, "learning_rate": 1.2e-05, "loss": 0.0308, "step": 7296 }, { "epoch": 76.0, "eval_LOCATION_f1": 0.8990825688073394, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8521739130434782, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8855421686746987, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9130434782608695, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04719982296228409, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.916256157635468, "eval_overall_precision": 0.914004914004914, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2753, "eval_samples_per_second": 617.519, "eval_steps_per_second": 10.897, "step": 7296 }, { "epoch": 77.0, "grad_norm": 0.4345172941684723, "learning_rate": 1.1500000000000002e-05, "loss": 0.0319, "step": 7392 }, { "epoch": 77.0, "eval_LOCATION_f1": 0.8990825688073394, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8521739130434782, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8828828828828829, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9074074074074074, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04743256792426109, "eval_overall_accuracy": 0.983982325324496, "eval_overall_f1": 0.914004914004914, "eval_overall_precision": 0.9095354523227384, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2723, "eval_samples_per_second": 624.315, "eval_steps_per_second": 11.017, "step": 7392 }, { "epoch": 78.0, "grad_norm": 0.5274398326873779, "learning_rate": 1.1000000000000001e-05, "loss": 0.0317, "step": 7488 }, { "epoch": 78.0, "eval_LOCATION_f1": 0.888888888888889, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8495575221238938, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8776119402985074, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8963414634146342, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9770992366412213, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9770992366412213, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04544433578848839, "eval_overall_accuracy": 0.983706158519746, "eval_overall_f1": 0.9126691266912669, "eval_overall_precision": 0.9093137254901961, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2743, "eval_samples_per_second": 619.68, "eval_steps_per_second": 10.936, "step": 7488 }, { "epoch": 79.0, "grad_norm": 0.9408136010169983, "learning_rate": 1.05e-05, "loss": 0.0305, "step": 7584 }, { "epoch": 79.0, "eval_LOCATION_f1": 0.888888888888889, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8495575221238938, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.875, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8909090909090909, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04592936858534813, "eval_overall_accuracy": 0.983706158519746, "eval_overall_f1": 0.9079754601226994, "eval_overall_precision": 0.9024390243902439, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2724, "eval_samples_per_second": 624.192, "eval_steps_per_second": 11.015, "step": 7584 }, { "epoch": 80.0, "grad_norm": 0.8380435109138489, "learning_rate": 1e-05, "loss": 0.0302, "step": 7680 }, { "epoch": 80.0, "eval_LOCATION_f1": 0.888888888888889, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8495575221238938, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8724035608308606, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8855421686746988, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0458238460123539, "eval_overall_accuracy": 0.983706158519746, "eval_overall_f1": 0.9068627450980392, "eval_overall_precision": 0.9002433090024331, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2717, "eval_samples_per_second": 625.672, "eval_steps_per_second": 11.041, "step": 7680 }, { "epoch": 81.0, "grad_norm": 0.48563507199287415, "learning_rate": 9.5e-06, "loss": 0.0315, "step": 7776 }, { "epoch": 81.0, "eval_LOCATION_f1": 0.888888888888889, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8495575221238938, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8724035608308606, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8855421686746988, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.045294273644685745, "eval_overall_accuracy": 0.983706158519746, "eval_overall_f1": 0.9068627450980392, "eval_overall_precision": 0.9002433090024331, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2726, "eval_samples_per_second": 623.621, "eval_steps_per_second": 11.005, "step": 7776 }, { "epoch": 82.0, "grad_norm": 1.0620760917663574, "learning_rate": 9e-06, "loss": 0.031, "step": 7872 }, { "epoch": 82.0, "eval_LOCATION_f1": 0.8940092165898617, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8508771929824561, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8776119402985074, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8963414634146342, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.044751718640327454, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9104294478527607, "eval_overall_precision": 0.9048780487804878, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2742, "eval_samples_per_second": 619.93, "eval_steps_per_second": 10.94, "step": 7872 }, { "epoch": 83.0, "grad_norm": 0.4180748462677002, "learning_rate": 8.500000000000002e-06, "loss": 0.0322, "step": 7968 }, { "epoch": 83.0, "eval_LOCATION_f1": 0.903225806451613, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8596491228070176, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8828828828828829, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9074074074074074, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04457832872867584, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.916256157635468, "eval_overall_precision": 0.914004914004914, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2709, "eval_samples_per_second": 627.557, "eval_steps_per_second": 11.075, "step": 7968 }, { "epoch": 84.0, "grad_norm": 0.31515267491340637, "learning_rate": 8.000000000000001e-06, "loss": 0.0292, "step": 8064 }, { "epoch": 84.0, "eval_LOCATION_f1": 0.888888888888889, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8495575221238938, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8724035608308606, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8855421686746988, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04457494989037514, "eval_overall_accuracy": 0.983706158519746, "eval_overall_f1": 0.9068627450980392, "eval_overall_precision": 0.9002433090024331, "eval_overall_recall": 0.9135802469135802, "eval_runtime": 0.2713, "eval_samples_per_second": 626.576, "eval_steps_per_second": 11.057, "step": 8064 }, { "epoch": 85.0, "grad_norm": 0.8950293660163879, "learning_rate": 7.5e-06, "loss": 0.0302, "step": 8160 }, { "epoch": 85.0, "eval_LOCATION_f1": 0.8990825688073394, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8521739130434782, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8835820895522388, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9024390243902439, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.044330187141895294, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9142156862745098, "eval_overall_precision": 0.9075425790754258, "eval_overall_recall": 0.9209876543209876, "eval_runtime": 0.2717, "eval_samples_per_second": 625.76, "eval_steps_per_second": 11.043, "step": 8160 }, { "epoch": 86.0, "grad_norm": 0.6794619560241699, "learning_rate": 7.000000000000001e-06, "loss": 0.0298, "step": 8256 }, { "epoch": 86.0, "eval_LOCATION_f1": 0.8990825688073394, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8521739130434782, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8828828828828829, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9074074074074074, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9694656488549618, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9694656488549618, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04548780620098114, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.915129151291513, "eval_overall_precision": 0.9117647058823529, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.274, "eval_samples_per_second": 620.455, "eval_steps_per_second": 10.949, "step": 8256 }, { "epoch": 87.0, "grad_norm": 1.192101001739502, "learning_rate": 6.5000000000000004e-06, "loss": 0.0311, "step": 8352 }, { "epoch": 87.0, "eval_LOCATION_f1": 0.8990825688073394, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8521739130434782, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04509204253554344, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9128834355828221, "eval_overall_precision": 0.9073170731707317, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2719, "eval_samples_per_second": 625.172, "eval_steps_per_second": 11.032, "step": 8352 }, { "epoch": 88.0, "grad_norm": 0.8452901840209961, "learning_rate": 6e-06, "loss": 0.0302, "step": 8448 }, { "epoch": 88.0, "eval_LOCATION_f1": 0.8990825688073394, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8521739130434782, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04407266154885292, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9128834355828221, "eval_overall_precision": 0.9073170731707317, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2725, "eval_samples_per_second": 623.962, "eval_steps_per_second": 11.011, "step": 8448 }, { "epoch": 89.0, "grad_norm": 0.26842784881591797, "learning_rate": 5.500000000000001e-06, "loss": 0.0295, "step": 8544 }, { "epoch": 89.0, "eval_LOCATION_f1": 0.8990825688073394, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8521739130434782, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.044793352484703064, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9128834355828221, "eval_overall_precision": 0.9073170731707317, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.271, "eval_samples_per_second": 627.372, "eval_steps_per_second": 11.071, "step": 8544 }, { "epoch": 90.0, "grad_norm": 1.0366742610931396, "learning_rate": 5e-06, "loss": 0.0299, "step": 8640 }, { "epoch": 90.0, "eval_LOCATION_f1": 0.8940092165898617, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8508771929824561, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8776119402985074, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8963414634146342, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04475311189889908, "eval_overall_accuracy": 0.983982325324496, "eval_overall_f1": 0.9104294478527607, "eval_overall_precision": 0.9048780487804878, "eval_overall_recall": 0.9160493827160494, "eval_runtime": 0.2713, "eval_samples_per_second": 626.541, "eval_steps_per_second": 11.057, "step": 8640 }, { "epoch": 91.0, "grad_norm": 1.2005571126937866, "learning_rate": 4.5e-06, "loss": 0.0301, "step": 8736 }, { "epoch": 91.0, "eval_LOCATION_f1": 0.903225806451613, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8596491228070176, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8776119402985074, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8963414634146342, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04551587998867035, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9128834355828221, "eval_overall_precision": 0.9073170731707317, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2727, "eval_samples_per_second": 623.429, "eval_steps_per_second": 11.002, "step": 8736 }, { "epoch": 92.0, "grad_norm": 1.420346736907959, "learning_rate": 4.000000000000001e-06, "loss": 0.03, "step": 8832 }, { "epoch": 92.0, "eval_LOCATION_f1": 0.903225806451613, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8596491228070176, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.046317245811223984, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.914004914004914, "eval_overall_precision": 0.9095354523227384, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2712, "eval_samples_per_second": 626.946, "eval_steps_per_second": 11.064, "step": 8832 }, { "epoch": 93.0, "grad_norm": 0.5579015612602234, "learning_rate": 3.5000000000000004e-06, "loss": 0.0275, "step": 8928 }, { "epoch": 93.0, "eval_LOCATION_f1": 0.903225806451613, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8596491228070176, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8776119402985074, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8963414634146342, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.045393262058496475, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9128834355828221, "eval_overall_precision": 0.9073170731707317, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2716, "eval_samples_per_second": 625.902, "eval_steps_per_second": 11.045, "step": 8928 }, { "epoch": 94.0, "grad_norm": 0.30002880096435547, "learning_rate": 3e-06, "loss": 0.0311, "step": 9024 }, { "epoch": 94.0, "eval_LOCATION_f1": 0.903225806451613, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8596491228070176, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0462392196059227, "eval_overall_accuracy": 0.983982325324496, "eval_overall_f1": 0.914004914004914, "eval_overall_precision": 0.9095354523227384, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2728, "eval_samples_per_second": 623.26, "eval_steps_per_second": 10.999, "step": 9024 }, { "epoch": 95.0, "grad_norm": 0.39586618542671204, "learning_rate": 2.5e-06, "loss": 0.0285, "step": 9120 }, { "epoch": 95.0, "eval_LOCATION_f1": 0.903225806451613, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8596491228070176, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04581024497747421, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.914004914004914, "eval_overall_precision": 0.9095354523227384, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2745, "eval_samples_per_second": 619.382, "eval_steps_per_second": 10.93, "step": 9120 }, { "epoch": 96.0, "grad_norm": 0.183471217751503, "learning_rate": 2.0000000000000003e-06, "loss": 0.0304, "step": 9216 }, { "epoch": 96.0, "eval_LOCATION_f1": 0.903225806451613, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8596491228070176, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.045909781008958817, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.914004914004914, "eval_overall_precision": 0.9095354523227384, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2706, "eval_samples_per_second": 628.124, "eval_steps_per_second": 11.085, "step": 9216 }, { "epoch": 97.0, "grad_norm": 0.34875068068504333, "learning_rate": 1.5e-06, "loss": 0.0285, "step": 9312 }, { "epoch": 97.0, "eval_LOCATION_f1": 0.8990825688073394, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8521739130434782, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04600595310330391, "eval_overall_accuracy": 0.983982325324496, "eval_overall_f1": 0.9128834355828221, "eval_overall_precision": 0.9073170731707317, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2708, "eval_samples_per_second": 627.807, "eval_steps_per_second": 11.079, "step": 9312 }, { "epoch": 98.0, "grad_norm": 1.6792984008789062, "learning_rate": 1.0000000000000002e-06, "loss": 0.0299, "step": 9408 }, { "epoch": 98.0, "eval_LOCATION_f1": 0.8990825688073394, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8521739130434782, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04618709161877632, "eval_overall_accuracy": 0.983982325324496, "eval_overall_f1": 0.9128834355828221, "eval_overall_precision": 0.9073170731707317, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2707, "eval_samples_per_second": 627.903, "eval_steps_per_second": 11.081, "step": 9408 }, { "epoch": 99.0, "grad_norm": 1.6316431760787964, "learning_rate": 5.000000000000001e-07, "loss": 0.0296, "step": 9504 }, { "epoch": 99.0, "eval_LOCATION_f1": 0.903225806451613, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8596491228070176, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04610595107078552, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.914004914004914, "eval_overall_precision": 0.9095354523227384, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.2718, "eval_samples_per_second": 625.363, "eval_steps_per_second": 11.036, "step": 9504 }, { "epoch": 100.0, "grad_norm": 0.07908789813518524, "learning_rate": 0.0, "loss": 0.0279, "step": 9600 }, { "epoch": 100.0, "eval_LOCATION_f1": 0.903225806451613, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8596491228070176, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8802395209580838, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.901840490797546, "eval_ORGANIZATION_recall": 0.8596491228070176, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04597840458154678, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.914004914004914, "eval_overall_precision": 0.9095354523227384, "eval_overall_recall": 0.9185185185185185, "eval_runtime": 0.272, "eval_samples_per_second": 624.998, "eval_steps_per_second": 11.029, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 3903436930778652.0, "train_loss": 0.07595260620117188, "train_runtime": 527.2222, "train_samples_per_second": 290.39, "train_steps_per_second": 18.209 } ], "logging_steps": 500, "max_steps": 9600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 3903436930778652.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }