{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.4515146017074585, "learning_rate": 4.9500000000000004e-05, "loss": 1.0545, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.0, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.0, "eval_ORGANIZATION_recall": 0.0, "eval_PERSON_f1": 0.0, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, "eval_loss": 0.6621571183204651, "eval_overall_accuracy": 0.8394180620367828, "eval_overall_f1": 0.0, "eval_overall_precision": 0.0, "eval_overall_recall": 0.0, "eval_runtime": 0.2785, "eval_samples_per_second": 610.49, "eval_steps_per_second": 10.773, "step": 96 }, { "epoch": 2.0, "grad_norm": 2.103937864303589, "learning_rate": 4.9e-05, "loss": 0.64, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.02469135802469136, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.5, "eval_ORGANIZATION_recall": 0.012658227848101266, "eval_PERSON_f1": 0.0, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, "eval_loss": 0.5205754637718201, "eval_overall_accuracy": 0.8399670601152895, "eval_overall_f1": 0.009900990099009901, "eval_overall_precision": 0.3333333333333333, "eval_overall_recall": 0.005025125628140704, "eval_runtime": 0.2762, "eval_samples_per_second": 615.56, "eval_steps_per_second": 10.863, "step": 192 }, { "epoch": 3.0, "grad_norm": 0.8918240666389465, "learning_rate": 4.85e-05, "loss": 0.503, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.015625, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.08333333333333333, "eval_LOCATION_recall": 0.008620689655172414, "eval_ORGANIZATION_f1": 0.2436974789915966, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.3625, "eval_ORGANIZATION_recall": 0.18354430379746836, "eval_PERSON_f1": 0.3214285714285714, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.36, "eval_PERSON_recall": 0.2903225806451613, "eval_loss": 0.37283432483673096, "eval_overall_accuracy": 0.8718089486686796, "eval_overall_f1": 0.22372881355932206, "eval_overall_precision": 0.34375, "eval_overall_recall": 0.1658291457286432, "eval_runtime": 0.2786, "eval_samples_per_second": 610.242, "eval_steps_per_second": 10.769, "step": 288 }, { "epoch": 4.0, "grad_norm": 1.4870269298553467, "learning_rate": 4.8e-05, "loss": 0.3537, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.3125, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.39473684210526316, "eval_LOCATION_recall": 0.25862068965517243, "eval_ORGANIZATION_f1": 0.5120481927710843, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.4885057471264368, "eval_ORGANIZATION_recall": 0.5379746835443038, "eval_PERSON_f1": 0.627177700348432, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.5521472392638037, "eval_PERSON_recall": 0.7258064516129032, "eval_loss": 0.2518472671508789, "eval_overall_accuracy": 0.9198462805380181, "eval_overall_f1": 0.5055487053020961, "eval_overall_precision": 0.4963680387409201, "eval_overall_recall": 0.5150753768844221, "eval_runtime": 0.2923, "eval_samples_per_second": 581.551, "eval_steps_per_second": 10.263, "step": 384 }, { "epoch": 5.0, "grad_norm": 1.199317455291748, "learning_rate": 4.75e-05, "loss": 0.2513, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.5892857142857143, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.6111111111111112, "eval_LOCATION_recall": 0.5689655172413793, "eval_ORGANIZATION_f1": 0.6590909090909092, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.5979381443298969, "eval_ORGANIZATION_recall": 0.7341772151898734, "eval_PERSON_f1": 0.8571428571428572, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.8028169014084507, "eval_PERSON_recall": 0.9193548387096774, "eval_loss": 0.1811506599187851, "eval_overall_accuracy": 0.9497666758166347, "eval_overall_f1": 0.7030878859857482, "eval_overall_precision": 0.6666666666666666, "eval_overall_recall": 0.7437185929648241, "eval_runtime": 0.2823, "eval_samples_per_second": 602.152, "eval_steps_per_second": 10.626, "step": 480 }, { "epoch": 6.0, "grad_norm": 0.7906554341316223, "learning_rate": 4.7e-05, "loss": 0.1948, "step": 576 }, { "epoch": 6.0, "eval_LOCATION_f1": 0.759493670886076, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.743801652892562, "eval_LOCATION_recall": 0.7758620689655172, "eval_ORGANIZATION_f1": 0.7659574468085106, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.7368421052631579, "eval_ORGANIZATION_recall": 0.7974683544303798, "eval_PERSON_f1": 0.9348659003831418, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.8905109489051095, "eval_PERSON_recall": 0.9838709677419355, "eval_loss": 0.13590684533119202, "eval_overall_accuracy": 0.9656876200933296, "eval_overall_f1": 0.8174123337363967, "eval_overall_precision": 0.7878787878787878, "eval_overall_recall": 0.8492462311557789, "eval_runtime": 0.2829, "eval_samples_per_second": 600.915, "eval_steps_per_second": 10.604, "step": 576 }, { "epoch": 7.0, "grad_norm": 1.447137475013733, "learning_rate": 4.6500000000000005e-05, "loss": 0.1623, "step": 672 }, { "epoch": 7.0, "eval_LOCATION_f1": 0.8050847457627119, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.7916666666666666, "eval_LOCATION_recall": 0.8189655172413793, "eval_ORGANIZATION_f1": 0.7852760736196319, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.7619047619047619, "eval_ORGANIZATION_recall": 0.810126582278481, "eval_PERSON_f1": 0.9457364341085271, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.9104477611940298, "eval_PERSON_recall": 0.9838709677419355, "eval_loss": 0.11087985336780548, "eval_overall_accuracy": 0.9700796047213834, "eval_overall_f1": 0.8414634146341463, "eval_overall_precision": 0.8175355450236966, "eval_overall_recall": 0.8668341708542714, "eval_runtime": 0.2806, "eval_samples_per_second": 605.903, "eval_steps_per_second": 10.692, "step": 672 }, { "epoch": 8.0, "grad_norm": 1.1691491603851318, "learning_rate": 4.600000000000001e-05, "loss": 0.1397, "step": 768 }, { "epoch": 8.0, "eval_LOCATION_f1": 0.8220338983050848, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8083333333333333, "eval_LOCATION_recall": 0.8362068965517241, "eval_ORGANIZATION_f1": 0.8220858895705523, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.7976190476190477, "eval_ORGANIZATION_recall": 0.8481012658227848, "eval_PERSON_f1": 0.9647058823529411, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.9389312977099237, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.09539666771888733, "eval_overall_accuracy": 0.9739225912709305, "eval_overall_f1": 0.8665850673194615, "eval_overall_precision": 0.8448687350835322, "eval_overall_recall": 0.8894472361809045, "eval_runtime": 0.2806, "eval_samples_per_second": 605.754, "eval_steps_per_second": 10.69, "step": 768 }, { "epoch": 9.0, "grad_norm": 0.8122084140777588, "learning_rate": 4.55e-05, "loss": 0.1266, "step": 864 }, { "epoch": 9.0, "eval_LOCATION_f1": 0.8559670781893004, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8188976377952756, "eval_LOCATION_recall": 0.896551724137931, "eval_ORGANIZATION_f1": 0.8404907975460123, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8154761904761905, "eval_ORGANIZATION_recall": 0.8670886075949367, "eval_PERSON_f1": 0.9609375, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.9318181818181818, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.0876610055565834, "eval_overall_accuracy": 0.9761185835849574, "eval_overall_f1": 0.8824242424242424, "eval_overall_precision": 0.8524590163934426, "eval_overall_recall": 0.914572864321608, "eval_runtime": 0.2798, "eval_samples_per_second": 607.653, "eval_steps_per_second": 10.723, "step": 864 }, { "epoch": 10.0, "grad_norm": 0.770068883895874, "learning_rate": 4.5e-05, "loss": 0.1157, "step": 960 }, { "epoch": 10.0, "eval_LOCATION_f1": 0.8823529411764706, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.860655737704918, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.8624999999999999, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8518518518518519, "eval_ORGANIZATION_recall": 0.8734177215189873, "eval_PERSON_f1": 0.9761904761904763, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.9609375, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.07306713610887527, "eval_overall_accuracy": 0.9799615701345046, "eval_overall_f1": 0.9037037037037038, "eval_overall_precision": 0.8883495145631068, "eval_overall_recall": 0.9195979899497487, "eval_runtime": 0.2933, "eval_samples_per_second": 579.604, "eval_steps_per_second": 10.228, "step": 960 }, { "epoch": 11.0, "grad_norm": 0.5027669072151184, "learning_rate": 4.4500000000000004e-05, "loss": 0.1111, "step": 1056 }, { "epoch": 11.0, "eval_LOCATION_f1": 0.8945147679324894, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8760330578512396, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.8792569659442724, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8606060606060606, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9800796812749005, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.06731364130973816, "eval_overall_accuracy": 0.9813340653307714, "eval_overall_f1": 0.9149198520345253, "eval_overall_precision": 0.8983050847457628, "eval_overall_recall": 0.9321608040201005, "eval_runtime": 0.2833, "eval_samples_per_second": 600.024, "eval_steps_per_second": 10.589, "step": 1056 }, { "epoch": 12.0, "grad_norm": 0.663327693939209, "learning_rate": 4.4000000000000006e-05, "loss": 0.1044, "step": 1152 }, { "epoch": 12.0, "eval_LOCATION_f1": 0.8945147679324894, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8760330578512396, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.8765432098765433, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8554216867469879, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9800796812749005, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.06354989856481552, "eval_overall_accuracy": 0.981059566291518, "eval_overall_f1": 0.9137931034482758, "eval_overall_precision": 0.8961352657004831, "eval_overall_recall": 0.9321608040201005, "eval_runtime": 0.282, "eval_samples_per_second": 602.932, "eval_steps_per_second": 10.64, "step": 1152 }, { "epoch": 13.0, "grad_norm": 1.0790586471557617, "learning_rate": 4.35e-05, "loss": 0.098, "step": 1248 }, { "epoch": 13.0, "eval_LOCATION_f1": 0.8974358974358974, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8898305084745762, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.8722741433021807, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8588957055214724, "eval_ORGANIZATION_recall": 0.8860759493670886, "eval_PERSON_f1": 0.9840000000000001, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.057803478091955185, "eval_overall_accuracy": 0.9816085643700247, "eval_overall_f1": 0.9142857142857144, "eval_overall_precision": 0.9041769041769042, "eval_overall_recall": 0.9246231155778895, "eval_runtime": 0.2823, "eval_samples_per_second": 602.275, "eval_steps_per_second": 10.628, "step": 1248 }, { "epoch": 14.0, "grad_norm": 1.317100167274475, "learning_rate": 4.3e-05, "loss": 0.0939, "step": 1344 }, { "epoch": 14.0, "eval_LOCATION_f1": 0.8898305084745763, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.8749999999999999, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8641975308641975, "eval_ORGANIZATION_recall": 0.8860759493670886, "eval_PERSON_f1": 0.9840000000000001, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.05590749531984329, "eval_overall_accuracy": 0.9818830634092781, "eval_overall_f1": 0.9131513647642681, "eval_overall_precision": 0.9019607843137255, "eval_overall_recall": 0.9246231155778895, "eval_runtime": 0.2821, "eval_samples_per_second": 602.62, "eval_steps_per_second": 10.634, "step": 1344 }, { "epoch": 15.0, "grad_norm": 0.728569507598877, "learning_rate": 4.25e-05, "loss": 0.091, "step": 1440 }, { "epoch": 15.0, "eval_LOCATION_f1": 0.8936170212765958, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.8685015290519877, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8402366863905325, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9800796812749005, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.05577402561903, "eval_overall_accuracy": 0.9816085643700247, "eval_overall_f1": 0.910209102091021, "eval_overall_precision": 0.891566265060241, "eval_overall_recall": 0.9296482412060302, "eval_runtime": 0.2813, "eval_samples_per_second": 604.331, "eval_steps_per_second": 10.665, "step": 1440 }, { "epoch": 16.0, "grad_norm": 1.4055036306381226, "learning_rate": 4.2e-05, "loss": 0.088, "step": 1536 }, { "epoch": 16.0, "eval_LOCATION_f1": 0.8898305084745763, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.8711656441717791, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8452380952380952, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.9534883720930233, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.05548899993300438, "eval_overall_accuracy": 0.981059566291518, "eval_overall_f1": 0.9079754601226994, "eval_overall_precision": 0.8872901678657075, "eval_overall_recall": 0.9296482412060302, "eval_runtime": 0.2808, "eval_samples_per_second": 605.486, "eval_steps_per_second": 10.685, "step": 1536 }, { "epoch": 17.0, "grad_norm": 0.5631800293922424, "learning_rate": 4.15e-05, "loss": 0.0857, "step": 1632 }, { "epoch": 17.0, "eval_LOCATION_f1": 0.8936170212765958, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8823529411764706, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.889589905362776, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8867924528301887, "eval_ORGANIZATION_recall": 0.8924050632911392, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.052291139960289, "eval_overall_accuracy": 0.9846280538018117, "eval_overall_f1": 0.9213483146067416, "eval_overall_precision": 0.9156327543424317, "eval_overall_recall": 0.9271356783919598, "eval_runtime": 0.2834, "eval_samples_per_second": 599.885, "eval_steps_per_second": 10.586, "step": 1632 }, { "epoch": 18.0, "grad_norm": 0.8045436143875122, "learning_rate": 4.1e-05, "loss": 0.0809, "step": 1728 }, { "epoch": 18.0, "eval_LOCATION_f1": 0.8860759493670886, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8677685950413223, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.8819875776397516, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8658536585365854, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.04978971555829048, "eval_overall_accuracy": 0.9832555586055449, "eval_overall_f1": 0.9158415841584159, "eval_overall_precision": 0.9024390243902439, "eval_overall_recall": 0.9296482412060302, "eval_runtime": 0.2824, "eval_samples_per_second": 602.083, "eval_steps_per_second": 10.625, "step": 1728 }, { "epoch": 19.0, "grad_norm": 0.8581104278564453, "learning_rate": 4.05e-05, "loss": 0.0773, "step": 1824 }, { "epoch": 19.0, "eval_LOCATION_f1": 0.8974358974358974, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8898305084745762, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.89375, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8827160493827161, "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.048203177750110626, "eval_overall_accuracy": 0.9843535547625584, "eval_overall_f1": 0.9240348692403487, "eval_overall_precision": 0.9160493827160494, "eval_overall_recall": 0.9321608040201005, "eval_runtime": 0.281, "eval_samples_per_second": 604.968, "eval_steps_per_second": 10.676, "step": 1824 }, { "epoch": 20.0, "grad_norm": 0.8157017827033997, "learning_rate": 4e-05, "loss": 0.0765, "step": 1920 }, { "epoch": 20.0, "eval_LOCATION_f1": 0.8983050847457628, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8833333333333333, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.8834355828220858, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8571428571428571, "eval_ORGANIZATION_recall": 0.9113924050632911, "eval_PERSON_f1": 0.9800796812749005, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.05213193967938423, "eval_overall_accuracy": 0.9821575624485315, "eval_overall_f1": 0.9175891758917589, "eval_overall_precision": 0.8987951807228916, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2817, "eval_samples_per_second": 603.469, "eval_steps_per_second": 10.649, "step": 1920 }, { "epoch": 21.0, "grad_norm": 1.5243802070617676, "learning_rate": 3.9500000000000005e-05, "loss": 0.0754, "step": 2016 }, { "epoch": 21.0, "eval_LOCATION_f1": 0.8898305084745763, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.8950617283950618, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8734939759036144, "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.0483800433576107, "eval_overall_accuracy": 0.984079055723305, "eval_overall_f1": 0.9221260815822002, "eval_overall_precision": 0.9075425790754258, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2805, "eval_samples_per_second": 606.104, "eval_steps_per_second": 10.696, "step": 2016 }, { "epoch": 22.0, "grad_norm": 1.5905965566635132, "learning_rate": 3.9000000000000006e-05, "loss": 0.072, "step": 2112 }, { "epoch": 22.0, "eval_LOCATION_f1": 0.8898305084745763, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.8792569659442724, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8606060606060606, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.046878568828105927, "eval_overall_accuracy": 0.9835300576447983, "eval_overall_f1": 0.9158415841584159, "eval_overall_precision": 0.9024390243902439, "eval_overall_recall": 0.9296482412060302, "eval_runtime": 0.2799, "eval_samples_per_second": 607.427, "eval_steps_per_second": 10.719, "step": 2112 }, { "epoch": 23.0, "grad_norm": 0.7514567375183105, "learning_rate": 3.85e-05, "loss": 0.0689, "step": 2208 }, { "epoch": 23.0, "eval_LOCATION_f1": 0.8974358974358974, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8898305084745762, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.9028213166144201, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8944099378881988, "eval_ORGANIZATION_recall": 0.9113924050632911, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.044030942022800446, "eval_overall_accuracy": 0.9843535547625584, "eval_overall_f1": 0.9276807980049875, "eval_overall_precision": 0.9207920792079208, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2806, "eval_samples_per_second": 605.822, "eval_steps_per_second": 10.691, "step": 2208 }, { "epoch": 24.0, "grad_norm": 1.4454855918884277, "learning_rate": 3.8e-05, "loss": 0.0697, "step": 2304 }, { "epoch": 24.0, "eval_LOCATION_f1": 0.9012875536480686, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8974358974358975, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.8881789137380192, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.896774193548387, "eval_ORGANIZATION_recall": 0.879746835443038, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.04559732973575592, "eval_overall_accuracy": 0.9846280538018117, "eval_overall_f1": 0.9232704402515725, "eval_overall_precision": 0.924433249370277, "eval_overall_recall": 0.9221105527638191, "eval_runtime": 0.2825, "eval_samples_per_second": 601.735, "eval_steps_per_second": 10.619, "step": 2304 }, { "epoch": 25.0, "grad_norm": 1.2895970344543457, "learning_rate": 3.7500000000000003e-05, "loss": 0.0656, "step": 2400 }, { "epoch": 25.0, "eval_LOCATION_f1": 0.9059829059829059, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8983050847457628, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.8867924528301886, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.88125, "eval_ORGANIZATION_recall": 0.8924050632911392, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.04359252005815506, "eval_overall_accuracy": 0.9846280538018117, "eval_overall_f1": 0.9238451935081149, "eval_overall_precision": 0.9181141439205955, "eval_overall_recall": 0.9296482412060302, "eval_runtime": 0.2817, "eval_samples_per_second": 603.476, "eval_steps_per_second": 10.65, "step": 2400 }, { "epoch": 26.0, "grad_norm": 0.6096397638320923, "learning_rate": 3.7e-05, "loss": 0.0658, "step": 2496 }, { "epoch": 26.0, "eval_LOCATION_f1": 0.9012875536480686, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8974358974358975, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.88125, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8703703703703703, "eval_ORGANIZATION_recall": 0.8924050632911392, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.0426979586482048, "eval_overall_accuracy": 0.984079055723305, "eval_overall_f1": 0.9201995012468827, "eval_overall_precision": 0.9133663366336634, "eval_overall_recall": 0.9271356783919598, "eval_runtime": 0.2821, "eval_samples_per_second": 602.528, "eval_steps_per_second": 10.633, "step": 2496 }, { "epoch": 27.0, "grad_norm": 1.2615808248519897, "learning_rate": 3.65e-05, "loss": 0.065, "step": 2592 }, { "epoch": 27.0, "eval_LOCATION_f1": 0.9051724137931035, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9051724137931034, "eval_LOCATION_recall": 0.9051724137931034, "eval_ORGANIZATION_f1": 0.897196261682243, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8834355828220859, "eval_ORGANIZATION_recall": 0.9113924050632911, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.04214628413319588, "eval_overall_accuracy": 0.9854515509195718, "eval_overall_f1": 0.9276807980049875, "eval_overall_precision": 0.9207920792079208, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2802, "eval_samples_per_second": 606.749, "eval_steps_per_second": 10.707, "step": 2592 }, { "epoch": 28.0, "grad_norm": 1.0637108087539673, "learning_rate": 3.6e-05, "loss": 0.0613, "step": 2688 }, { "epoch": 28.0, "eval_LOCATION_f1": 0.8983050847457628, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8833333333333333, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.8965517241379312, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8881987577639752, "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.041786737740039825, "eval_overall_accuracy": 0.9854515509195718, "eval_overall_f1": 0.9253731343283583, "eval_overall_precision": 0.916256157635468, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.281, "eval_samples_per_second": 604.884, "eval_steps_per_second": 10.674, "step": 2688 }, { "epoch": 29.0, "grad_norm": 0.4293781816959381, "learning_rate": 3.55e-05, "loss": 0.0591, "step": 2784 }, { "epoch": 29.0, "eval_LOCATION_f1": 0.9098712446351932, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.905982905982906, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.8965517241379312, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8881987577639752, "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.039750631898641586, "eval_overall_accuracy": 0.9873730441943454, "eval_overall_f1": 0.9288389513108615, "eval_overall_precision": 0.9230769230769231, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2819, "eval_samples_per_second": 603.055, "eval_steps_per_second": 10.642, "step": 2784 }, { "epoch": 30.0, "grad_norm": 1.076916217803955, "learning_rate": 3.5e-05, "loss": 0.06, "step": 2880 }, { "epoch": 30.0, "eval_LOCATION_f1": 0.9098712446351932, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.905982905982906, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.9022082018927445, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.89937106918239, "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03954719007015228, "eval_overall_accuracy": 0.9865495470765853, "eval_overall_f1": 0.9311639549436797, "eval_overall_precision": 0.9276807980049875, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2821, "eval_samples_per_second": 602.55, "eval_steps_per_second": 10.633, "step": 2880 }, { "epoch": 31.0, "grad_norm": 0.6545688509941101, "learning_rate": 3.45e-05, "loss": 0.0566, "step": 2976 }, { "epoch": 31.0, "eval_LOCATION_f1": 0.9059829059829059, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8983050847457628, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.89375, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8827160493827161, "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03855058550834656, "eval_overall_accuracy": 0.9862750480373319, "eval_overall_f1": 0.9265255292652553, "eval_overall_precision": 0.9185185185185185, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.279, "eval_samples_per_second": 609.398, "eval_steps_per_second": 10.754, "step": 2976 }, { "epoch": 32.0, "grad_norm": 1.2271991968154907, "learning_rate": 3.4000000000000007e-05, "loss": 0.0566, "step": 3072 }, { "epoch": 32.0, "eval_LOCATION_f1": 0.8927038626609441, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.896551724137931, "eval_ORGANIZATION_f1": 0.9015873015873016, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9044585987261147, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03917478770017624, "eval_overall_accuracy": 0.9857260499588252, "eval_overall_f1": 0.9259723964868257, "eval_overall_precision": 0.924812030075188, "eval_overall_recall": 0.9271356783919598, "eval_runtime": 0.2818, "eval_samples_per_second": 603.321, "eval_steps_per_second": 10.647, "step": 3072 }, { "epoch": 33.0, "grad_norm": 0.7312807440757751, "learning_rate": 3.35e-05, "loss": 0.0566, "step": 3168 }, { "epoch": 33.0, "eval_LOCATION_f1": 0.9106382978723405, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8991596638655462, "eval_LOCATION_recall": 0.9224137931034483, "eval_ORGANIZATION_f1": 0.9015873015873016, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9044585987261147, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.039775025099515915, "eval_overall_accuracy": 0.9865495470765853, "eval_overall_f1": 0.9311639549436797, "eval_overall_precision": 0.9276807980049875, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2816, "eval_samples_per_second": 603.775, "eval_steps_per_second": 10.655, "step": 3168 }, { "epoch": 34.0, "grad_norm": 0.8602070212364197, "learning_rate": 3.3e-05, "loss": 0.0568, "step": 3264 }, { "epoch": 34.0, "eval_LOCATION_f1": 0.9224137931034483, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9224137931034483, "eval_LOCATION_recall": 0.9224137931034483, "eval_ORGANIZATION_f1": 0.90625, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8950617283950617, "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03961801528930664, "eval_overall_accuracy": 0.987098545155092, "eval_overall_f1": 0.9363295880149813, "eval_overall_precision": 0.9305210918114144, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2818, "eval_samples_per_second": 603.331, "eval_steps_per_second": 10.647, "step": 3264 }, { "epoch": 35.0, "grad_norm": 1.1235308647155762, "learning_rate": 3.2500000000000004e-05, "loss": 0.0532, "step": 3360 }, { "epoch": 35.0, "eval_LOCATION_f1": 0.9059829059829059, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8983050847457628, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.9050632911392406, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9050632911392406, "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.037914156913757324, "eval_overall_accuracy": 0.987098545155092, "eval_overall_f1": 0.9311639549436797, "eval_overall_precision": 0.9276807980049875, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2789, "eval_samples_per_second": 609.623, "eval_steps_per_second": 10.758, "step": 3360 }, { "epoch": 36.0, "grad_norm": 0.49202960729599, "learning_rate": 3.2000000000000005e-05, "loss": 0.052, "step": 3456 }, { "epoch": 36.0, "eval_LOCATION_f1": 0.927038626609442, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9125, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9012345679012346, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.04034107178449631, "eval_overall_accuracy": 0.9879220422728521, "eval_overall_f1": 0.9401496259351622, "eval_overall_precision": 0.9331683168316832, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2807, "eval_samples_per_second": 605.629, "eval_steps_per_second": 10.688, "step": 3456 }, { "epoch": 37.0, "grad_norm": 1.104040503501892, "learning_rate": 3.15e-05, "loss": 0.0516, "step": 3552 }, { "epoch": 37.0, "eval_LOCATION_f1": 0.9059829059829059, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8983050847457628, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.9056603773584907, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9, "eval_ORGANIZATION_recall": 0.9113924050632911, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03864772990345955, "eval_overall_accuracy": 0.9873730441943454, "eval_overall_f1": 0.9313358302122346, "eval_overall_precision": 0.9255583126550868, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2787, "eval_samples_per_second": 609.902, "eval_steps_per_second": 10.763, "step": 3552 }, { "epoch": 38.0, "grad_norm": 0.9916018843650818, "learning_rate": 3.1e-05, "loss": 0.0497, "step": 3648 }, { "epoch": 38.0, "eval_LOCATION_f1": 0.9106382978723405, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8991596638655462, "eval_LOCATION_recall": 0.9224137931034483, "eval_ORGANIZATION_f1": 0.9022082018927445, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.89937106918239, "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03776225075125694, "eval_overall_accuracy": 0.9879220422728521, "eval_overall_f1": 0.9313358302122346, "eval_overall_precision": 0.9255583126550868, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2818, "eval_samples_per_second": 603.336, "eval_steps_per_second": 10.647, "step": 3648 }, { "epoch": 39.0, "grad_norm": 0.43910089135169983, "learning_rate": 3.05e-05, "loss": 0.052, "step": 3744 }, { "epoch": 39.0, "eval_LOCATION_f1": 0.9137931034482759, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9137931034482759, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.9090909090909092, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9006211180124224, "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.036575764417648315, "eval_overall_accuracy": 0.9884710403513588, "eval_overall_f1": 0.9349999999999999, "eval_overall_precision": 0.9303482587064676, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2834, "eval_samples_per_second": 599.755, "eval_steps_per_second": 10.584, "step": 3744 }, { "epoch": 40.0, "grad_norm": 0.8054441809654236, "learning_rate": 3e-05, "loss": 0.0472, "step": 3840 }, { "epoch": 40.0, "eval_LOCATION_f1": 0.9137931034482759, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9137931034482759, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.8987341772151899, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8987341772151899, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03671231493353844, "eval_overall_accuracy": 0.9868240461158386, "eval_overall_f1": 0.9309912170639899, "eval_overall_precision": 0.9298245614035088, "eval_overall_recall": 0.9321608040201005, "eval_runtime": 0.2821, "eval_samples_per_second": 602.601, "eval_steps_per_second": 10.634, "step": 3840 }, { "epoch": 41.0, "grad_norm": 0.3345367908477783, "learning_rate": 2.95e-05, "loss": 0.0486, "step": 3936 }, { "epoch": 41.0, "eval_LOCATION_f1": 0.9191489361702128, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.907563025210084, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.91875, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9074074074074074, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.038836024701595306, "eval_overall_accuracy": 0.9881965413121054, "eval_overall_f1": 0.9402985074626866, "eval_overall_precision": 0.9310344827586207, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2815, "eval_samples_per_second": 603.989, "eval_steps_per_second": 10.659, "step": 3936 }, { "epoch": 42.0, "grad_norm": 0.16970324516296387, "learning_rate": 2.9e-05, "loss": 0.047, "step": 4032 }, { "epoch": 42.0, "eval_LOCATION_f1": 0.9145299145299146, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9067796610169492, "eval_LOCATION_recall": 0.9224137931034483, "eval_ORGANIZATION_f1": 0.9073482428115016, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9161290322580645, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.037543974816799164, "eval_overall_accuracy": 0.9873730441943454, "eval_overall_f1": 0.9346733668341709, "eval_overall_precision": 0.9346733668341709, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.28, "eval_samples_per_second": 607.074, "eval_steps_per_second": 10.713, "step": 4032 }, { "epoch": 43.0, "grad_norm": 0.6317854523658752, "learning_rate": 2.8499999999999998e-05, "loss": 0.0481, "step": 4128 }, { "epoch": 43.0, "eval_LOCATION_f1": 0.9059829059829059, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8983050847457628, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.9050632911392406, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9050632911392406, "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03800812363624573, "eval_overall_accuracy": 0.9860005489980785, "eval_overall_f1": 0.9311639549436797, "eval_overall_precision": 0.9276807980049875, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2816, "eval_samples_per_second": 603.734, "eval_steps_per_second": 10.654, "step": 4128 }, { "epoch": 44.0, "grad_norm": 0.9596404433250427, "learning_rate": 2.8000000000000003e-05, "loss": 0.0468, "step": 4224 }, { "epoch": 44.0, "eval_LOCATION_f1": 0.927038626609442, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9119496855345911, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.90625, "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03910643607378006, "eval_overall_accuracy": 0.9876475432335987, "eval_overall_f1": 0.94, "eval_overall_precision": 0.9353233830845771, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2799, "eval_samples_per_second": 607.283, "eval_steps_per_second": 10.717, "step": 4224 }, { "epoch": 45.0, "grad_norm": 1.3304390907287598, "learning_rate": 2.7500000000000004e-05, "loss": 0.0473, "step": 4320 }, { "epoch": 45.0, "eval_LOCATION_f1": 0.9106382978723405, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.8991596638655462, "eval_LOCATION_recall": 0.9224137931034483, "eval_ORGANIZATION_f1": 0.9015873015873016, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9044585987261147, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03656316548585892, "eval_overall_accuracy": 0.9868240461158386, "eval_overall_f1": 0.9311639549436797, "eval_overall_precision": 0.9276807980049875, "eval_overall_recall": 0.9346733668341709, "eval_runtime": 0.2813, "eval_samples_per_second": 604.337, "eval_steps_per_second": 10.665, "step": 4320 }, { "epoch": 46.0, "grad_norm": 0.8157849907875061, "learning_rate": 2.7000000000000002e-05, "loss": 0.0441, "step": 4416 }, { "epoch": 46.0, "eval_LOCATION_f1": 0.9152542372881356, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9090909090909092, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9006211180124224, "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03721893951296806, "eval_overall_accuracy": 0.9887455393906122, "eval_overall_f1": 0.9353233830845771, "eval_overall_precision": 0.9261083743842364, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2817, "eval_samples_per_second": 603.43, "eval_steps_per_second": 10.649, "step": 4416 }, { "epoch": 47.0, "grad_norm": 1.0967856645584106, "learning_rate": 2.6500000000000004e-05, "loss": 0.0441, "step": 4512 }, { "epoch": 47.0, "eval_LOCATION_f1": 0.9224137931034483, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9224137931034483, "eval_LOCATION_recall": 0.9224137931034483, "eval_ORGANIZATION_f1": 0.9153605015673981, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.906832298136646, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.037474945187568665, "eval_overall_accuracy": 0.9887455393906122, "eval_overall_f1": 0.94, "eval_overall_precision": 0.9353233830845771, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2834, "eval_samples_per_second": 599.905, "eval_steps_per_second": 10.587, "step": 4512 }, { "epoch": 48.0, "grad_norm": 0.8679947853088379, "learning_rate": 2.6000000000000002e-05, "loss": 0.0416, "step": 4608 }, { "epoch": 48.0, "eval_LOCATION_f1": 0.9316239316239315, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.923728813559322, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9333333333333332, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9363057324840764, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03589639067649841, "eval_overall_accuracy": 0.9898435355476256, "eval_overall_f1": 0.9498746867167919, "eval_overall_precision": 0.9475, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2828, "eval_samples_per_second": 601.064, "eval_steps_per_second": 10.607, "step": 4608 }, { "epoch": 49.0, "grad_norm": 0.5795214176177979, "learning_rate": 2.5500000000000003e-05, "loss": 0.0446, "step": 4704 }, { "epoch": 49.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9152542372881356, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.8958990536277602, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8930817610062893, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03545619174838066, "eval_overall_accuracy": 0.9876475432335987, "eval_overall_f1": 0.9325, "eval_overall_precision": 0.927860696517413, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.2807, "eval_samples_per_second": 605.543, "eval_steps_per_second": 10.686, "step": 4704 }, { "epoch": 50.0, "grad_norm": 1.6378672122955322, "learning_rate": 2.5e-05, "loss": 0.0425, "step": 4800 }, { "epoch": 50.0, "eval_LOCATION_f1": 0.9276595744680851, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9159663865546218, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9056603773584907, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9, "eval_ORGANIZATION_recall": 0.9113924050632911, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.036550432443618774, "eval_overall_accuracy": 0.9887455393906122, "eval_overall_f1": 0.9376558603491272, "eval_overall_precision": 0.9306930693069307, "eval_overall_recall": 0.9447236180904522, "eval_runtime": 0.2824, "eval_samples_per_second": 601.912, "eval_steps_per_second": 10.622, "step": 4800 }, { "epoch": 51.0, "grad_norm": 0.6017678380012512, "learning_rate": 2.45e-05, "loss": 0.0422, "step": 4896 }, { "epoch": 51.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9152542372881356, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.910828025477707, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9166666666666666, "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.036360133439302444, "eval_overall_accuracy": 0.987098545155092, "eval_overall_f1": 0.9385194479297366, "eval_overall_precision": 0.9373433583959899, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2813, "eval_samples_per_second": 604.3, "eval_steps_per_second": 10.664, "step": 4896 }, { "epoch": 52.0, "grad_norm": 1.1220182180404663, "learning_rate": 2.4e-05, "loss": 0.0409, "step": 4992 }, { "epoch": 52.0, "eval_LOCATION_f1": 0.9184549356223175, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9145299145299145, "eval_LOCATION_recall": 0.9224137931034483, "eval_ORGANIZATION_f1": 0.91875, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9074074074074074, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03568459302186966, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.9401496259351622, "eval_overall_precision": 0.9331683168316832, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2809, "eval_samples_per_second": 605.214, "eval_steps_per_second": 10.68, "step": 4992 }, { "epoch": 53.0, "grad_norm": 1.0612504482269287, "learning_rate": 2.35e-05, "loss": 0.0414, "step": 5088 }, { "epoch": 53.0, "eval_LOCATION_f1": 0.927038626609442, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9249999999999999, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9135802469135802, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03593689948320389, "eval_overall_accuracy": 0.990118034586879, "eval_overall_f1": 0.9451371571072318, "eval_overall_precision": 0.9381188118811881, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2864, "eval_samples_per_second": 593.537, "eval_steps_per_second": 10.474, "step": 5088 }, { "epoch": 54.0, "grad_norm": 1.130049228668213, "learning_rate": 2.3000000000000003e-05, "loss": 0.0403, "step": 5184 }, { "epoch": 54.0, "eval_LOCATION_f1": 0.927038626609442, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9130434782608695, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8963414634146342, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03527674823999405, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.9402985074626866, "eval_overall_precision": 0.9310344827586207, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2806, "eval_samples_per_second": 605.883, "eval_steps_per_second": 10.692, "step": 5184 }, { "epoch": 55.0, "grad_norm": 1.4285669326782227, "learning_rate": 2.25e-05, "loss": 0.0393, "step": 5280 }, { "epoch": 55.0, "eval_LOCATION_f1": 0.9184549356223175, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9145299145299145, "eval_LOCATION_recall": 0.9224137931034483, "eval_ORGANIZATION_f1": 0.9249999999999999, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9135802469135802, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03516939654946327, "eval_overall_accuracy": 0.9898435355476256, "eval_overall_f1": 0.9426433915211971, "eval_overall_precision": 0.9356435643564357, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2817, "eval_samples_per_second": 603.519, "eval_steps_per_second": 10.65, "step": 5280 }, { "epoch": 56.0, "grad_norm": 0.668100893497467, "learning_rate": 2.2000000000000003e-05, "loss": 0.0405, "step": 5376 }, { "epoch": 56.0, "eval_LOCATION_f1": 0.9316239316239315, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.923728813559322, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9430379746835443, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9430379746835443, "eval_ORGANIZATION_recall": 0.9430379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03589174896478653, "eval_overall_accuracy": 0.990118034586879, "eval_overall_f1": 0.9536921151439299, "eval_overall_precision": 0.9501246882793017, "eval_overall_recall": 0.957286432160804, "eval_runtime": 0.2822, "eval_samples_per_second": 602.42, "eval_steps_per_second": 10.631, "step": 5376 }, { "epoch": 57.0, "grad_norm": 0.7897509336471558, "learning_rate": 2.15e-05, "loss": 0.0404, "step": 5472 }, { "epoch": 57.0, "eval_LOCATION_f1": 0.9276595744680851, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9159663865546218, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9400630914826499, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9371069182389937, "eval_ORGANIZATION_recall": 0.9430379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.037044160068035126, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.951310861423221, "eval_overall_precision": 0.9454094292803971, "eval_overall_recall": 0.957286432160804, "eval_runtime": 0.2798, "eval_samples_per_second": 607.511, "eval_steps_per_second": 10.721, "step": 5472 }, { "epoch": 58.0, "grad_norm": 0.9230952262878418, "learning_rate": 2.1e-05, "loss": 0.0398, "step": 5568 }, { "epoch": 58.0, "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9316239316239316, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9337539432176657, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9308176100628931, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.035523511469364166, "eval_overall_accuracy": 0.9903925336261323, "eval_overall_f1": 0.951188986232791, "eval_overall_precision": 0.9476309226932669, "eval_overall_recall": 0.9547738693467337, "eval_runtime": 0.282, "eval_samples_per_second": 602.9, "eval_steps_per_second": 10.639, "step": 5568 }, { "epoch": 59.0, "grad_norm": 0.6653354167938232, "learning_rate": 2.05e-05, "loss": 0.0382, "step": 5664 }, { "epoch": 59.0, "eval_LOCATION_f1": 0.9396551724137931, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9396551724137931, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9490445859872612, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9551282051282052, "eval_ORGANIZATION_recall": 0.9430379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03545346483588219, "eval_overall_accuracy": 0.9903925336261323, "eval_overall_f1": 0.9584905660377357, "eval_overall_precision": 0.9596977329974811, "eval_overall_recall": 0.957286432160804, "eval_runtime": 0.285, "eval_samples_per_second": 596.488, "eval_steps_per_second": 10.526, "step": 5664 }, { "epoch": 60.0, "grad_norm": 1.0160192251205444, "learning_rate": 2e-05, "loss": 0.0396, "step": 5760 }, { "epoch": 60.0, "eval_LOCATION_f1": 0.9276595744680851, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9159663865546218, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9182389937106918, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9125, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03439299017190933, "eval_overall_accuracy": 0.9892945374691189, "eval_overall_f1": 0.9426433915211971, "eval_overall_precision": 0.9356435643564357, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2803, "eval_samples_per_second": 606.413, "eval_steps_per_second": 10.701, "step": 5760 }, { "epoch": 61.0, "grad_norm": 0.5373579263687134, "learning_rate": 1.9500000000000003e-05, "loss": 0.0362, "step": 5856 }, { "epoch": 61.0, "eval_LOCATION_f1": 0.927038626609442, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9137380191693291, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9225806451612903, "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03558684140443802, "eval_overall_accuracy": 0.9879220422728521, "eval_overall_f1": 0.9408805031446541, "eval_overall_precision": 0.9420654911838791, "eval_overall_recall": 0.9396984924623115, "eval_runtime": 0.2814, "eval_samples_per_second": 604.106, "eval_steps_per_second": 10.661, "step": 5856 }, { "epoch": 62.0, "grad_norm": 1.3705915212631226, "learning_rate": 1.9e-05, "loss": 0.037, "step": 5952 }, { "epoch": 62.0, "eval_LOCATION_f1": 0.9316239316239315, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.923728813559322, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.910828025477707, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9166666666666666, "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03602343425154686, "eval_overall_accuracy": 0.9881965413121054, "eval_overall_f1": 0.9410288582183186, "eval_overall_precision": 0.9398496240601504, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.2805, "eval_samples_per_second": 606.05, "eval_steps_per_second": 10.695, "step": 5952 }, { "epoch": 63.0, "grad_norm": 0.5670416355133057, "learning_rate": 1.85e-05, "loss": 0.0386, "step": 6048 }, { "epoch": 63.0, "eval_LOCATION_f1": 0.9310344827586207, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9310344827586207, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9367088607594937, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9367088607594937, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.036378536373376846, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.9510664993726474, "eval_overall_precision": 0.949874686716792, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2812, "eval_samples_per_second": 604.53, "eval_steps_per_second": 10.668, "step": 6048 }, { "epoch": 64.0, "grad_norm": 1.2524464130401611, "learning_rate": 1.8e-05, "loss": 0.0365, "step": 6144 }, { "epoch": 64.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9152542372881356, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9260450160771704, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9411764705882353, "eval_ORGANIZATION_recall": 0.9113924050632911, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.036032553762197495, "eval_overall_accuracy": 0.9887455393906122, "eval_overall_f1": 0.9445843828715367, "eval_overall_precision": 0.946969696969697, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.282, "eval_samples_per_second": 602.84, "eval_steps_per_second": 10.638, "step": 6144 }, { "epoch": 65.0, "grad_norm": 0.5007239580154419, "learning_rate": 1.75e-05, "loss": 0.0347, "step": 6240 }, { "epoch": 65.0, "eval_LOCATION_f1": 0.9316239316239315, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.923728813559322, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9294871794871796, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9415584415584416, "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.035410117357969284, "eval_overall_accuracy": 0.9887455393906122, "eval_overall_f1": 0.9484276729559749, "eval_overall_precision": 0.9496221662468514, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2862, "eval_samples_per_second": 594.083, "eval_steps_per_second": 10.484, "step": 6240 }, { "epoch": 66.0, "grad_norm": 0.7251582741737366, "learning_rate": 1.7000000000000003e-05, "loss": 0.0393, "step": 6336 }, { "epoch": 66.0, "eval_LOCATION_f1": 0.9396551724137931, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9396551724137931, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.926517571884984, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9354838709677419, "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.036589812487363815, "eval_overall_accuracy": 0.9887455393906122, "eval_overall_f1": 0.9496221662468514, "eval_overall_precision": 0.952020202020202, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2814, "eval_samples_per_second": 604.074, "eval_steps_per_second": 10.66, "step": 6336 }, { "epoch": 67.0, "grad_norm": 1.2316526174545288, "learning_rate": 1.65e-05, "loss": 0.0359, "step": 6432 }, { "epoch": 67.0, "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9316239316239316, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9240506329113924, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9240506329113924, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.034778717905282974, "eval_overall_accuracy": 0.9892945374691189, "eval_overall_f1": 0.9473684210526316, "eval_overall_precision": 0.945, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2795, "eval_samples_per_second": 608.215, "eval_steps_per_second": 10.733, "step": 6432 }, { "epoch": 68.0, "grad_norm": 0.5684135556221008, "learning_rate": 1.6000000000000003e-05, "loss": 0.0331, "step": 6528 }, { "epoch": 68.0, "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9316239316239316, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9177215189873418, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9177215189873418, "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03474760055541992, "eval_overall_accuracy": 0.9890200384298655, "eval_overall_f1": 0.9448621553884713, "eval_overall_precision": 0.9425, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2799, "eval_samples_per_second": 607.468, "eval_steps_per_second": 10.72, "step": 6528 }, { "epoch": 69.0, "grad_norm": 0.7760136127471924, "learning_rate": 1.55e-05, "loss": 0.0344, "step": 6624 }, { "epoch": 69.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9333333333333332, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9363057324840764, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03407077118754387, "eval_overall_accuracy": 0.9898435355476256, "eval_overall_f1": 0.950943396226415, "eval_overall_precision": 0.9521410579345088, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2833, "eval_samples_per_second": 600.147, "eval_steps_per_second": 10.591, "step": 6624 }, { "epoch": 70.0, "grad_norm": 1.5560885667800903, "learning_rate": 1.5e-05, "loss": 0.0349, "step": 6720 }, { "epoch": 70.0, "eval_LOCATION_f1": 0.9396551724137931, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9396551724137931, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9426751592356688, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.034501850605010986, "eval_overall_accuracy": 0.990118034586879, "eval_overall_f1": 0.9547738693467337, "eval_overall_precision": 0.9547738693467337, "eval_overall_recall": 0.9547738693467337, "eval_runtime": 0.28, "eval_samples_per_second": 607.101, "eval_steps_per_second": 10.714, "step": 6720 }, { "epoch": 71.0, "grad_norm": 0.7581639885902405, "learning_rate": 1.45e-05, "loss": 0.0349, "step": 6816 }, { "epoch": 71.0, "eval_LOCATION_f1": 0.9310344827586207, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9310344827586207, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.926984126984127, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9299363057324841, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03540712222456932, "eval_overall_accuracy": 0.9884710403513588, "eval_overall_f1": 0.9472361809045227, "eval_overall_precision": 0.9472361809045227, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2795, "eval_samples_per_second": 608.239, "eval_steps_per_second": 10.734, "step": 6816 }, { "epoch": 72.0, "grad_norm": 0.30566951632499695, "learning_rate": 1.4000000000000001e-05, "loss": 0.0342, "step": 6912 }, { "epoch": 72.0, "eval_LOCATION_f1": 0.9316239316239315, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.923728813559322, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.926984126984127, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9299363057324841, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.034342918545007706, "eval_overall_accuracy": 0.9887455393906122, "eval_overall_f1": 0.9473684210526316, "eval_overall_precision": 0.945, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2798, "eval_samples_per_second": 607.48, "eval_steps_per_second": 10.72, "step": 6912 }, { "epoch": 73.0, "grad_norm": 1.0282853841781616, "learning_rate": 1.3500000000000001e-05, "loss": 0.0333, "step": 7008 }, { "epoch": 73.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9240506329113924, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9240506329113924, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03536517545580864, "eval_overall_accuracy": 0.9890200384298655, "eval_overall_f1": 0.9472361809045227, "eval_overall_precision": 0.9472361809045227, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2817, "eval_samples_per_second": 603.375, "eval_steps_per_second": 10.648, "step": 7008 }, { "epoch": 74.0, "grad_norm": 3.3504393100738525, "learning_rate": 1.3000000000000001e-05, "loss": 0.0332, "step": 7104 }, { "epoch": 74.0, "eval_LOCATION_f1": 0.927038626609442, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9240506329113924, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9240506329113924, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.0346221849322319, "eval_overall_accuracy": 0.9892945374691189, "eval_overall_f1": 0.9448621553884713, "eval_overall_precision": 0.9425, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2789, "eval_samples_per_second": 609.459, "eval_steps_per_second": 10.755, "step": 7104 }, { "epoch": 75.0, "grad_norm": 0.7059272527694702, "learning_rate": 1.25e-05, "loss": 0.0346, "step": 7200 }, { "epoch": 75.0, "eval_LOCATION_f1": 0.9310344827586207, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9310344827586207, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9274447949526813, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9245283018867925, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.0342298299074173, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.9473684210526316, "eval_overall_precision": 0.945, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2836, "eval_samples_per_second": 599.44, "eval_steps_per_second": 10.578, "step": 7200 }, { "epoch": 76.0, "grad_norm": 1.0325454473495483, "learning_rate": 1.2e-05, "loss": 0.0334, "step": 7296 }, { "epoch": 76.0, "eval_LOCATION_f1": 0.9224137931034483, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9224137931034483, "eval_LOCATION_recall": 0.9224137931034483, "eval_ORGANIZATION_f1": 0.9308176100628932, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.925, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.0346398688852787, "eval_overall_accuracy": 0.9903925336261323, "eval_overall_f1": 0.9461827284105131, "eval_overall_precision": 0.942643391521197, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2805, "eval_samples_per_second": 605.977, "eval_steps_per_second": 10.694, "step": 7296 }, { "epoch": 77.0, "grad_norm": 0.531468391418457, "learning_rate": 1.1500000000000002e-05, "loss": 0.034, "step": 7392 }, { "epoch": 77.0, "eval_LOCATION_f1": 0.9396551724137931, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9396551724137931, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.926984126984127, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9299363057324841, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03495112434029579, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.949748743718593, "eval_overall_precision": 0.949748743718593, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2841, "eval_samples_per_second": 598.465, "eval_steps_per_second": 10.561, "step": 7392 }, { "epoch": 78.0, "grad_norm": 0.8816949129104614, "learning_rate": 1.1000000000000001e-05, "loss": 0.0341, "step": 7488 }, { "epoch": 78.0, "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9316239316239316, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9333333333333332, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9363057324840764, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.034044817090034485, "eval_overall_accuracy": 0.9903925336261323, "eval_overall_f1": 0.9510664993726474, "eval_overall_precision": 0.949874686716792, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2793, "eval_samples_per_second": 608.583, "eval_steps_per_second": 10.74, "step": 7488 }, { "epoch": 79.0, "grad_norm": 0.5149128437042236, "learning_rate": 1.05e-05, "loss": 0.033, "step": 7584 }, { "epoch": 79.0, "eval_LOCATION_f1": 0.9264069264069263, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9304347826086956, "eval_LOCATION_recall": 0.9224137931034483, "eval_ORGANIZATION_f1": 0.9308176100628932, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.925, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.034786492586135864, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.9473684210526316, "eval_overall_precision": 0.945, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2819, "eval_samples_per_second": 602.992, "eval_steps_per_second": 10.641, "step": 7584 }, { "epoch": 80.0, "grad_norm": 1.0311975479125977, "learning_rate": 1e-05, "loss": 0.0308, "step": 7680 }, { "epoch": 80.0, "eval_LOCATION_f1": 0.9137931034482759, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9137931034482759, "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.9278996865203761, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9192546583850931, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.0336722694337368, "eval_overall_accuracy": 0.9898435355476256, "eval_overall_f1": 0.9425, "eval_overall_precision": 0.9378109452736318, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2809, "eval_samples_per_second": 605.269, "eval_steps_per_second": 10.681, "step": 7680 }, { "epoch": 81.0, "grad_norm": 0.3829832971096039, "learning_rate": 9.5e-06, "loss": 0.031, "step": 7776 }, { "epoch": 81.0, "eval_LOCATION_f1": 0.9224137931034483, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9224137931034483, "eval_LOCATION_recall": 0.9224137931034483, "eval_ORGANIZATION_f1": 0.9278996865203761, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9192546583850931, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03411416336894035, "eval_overall_accuracy": 0.990118034586879, "eval_overall_f1": 0.9450000000000001, "eval_overall_precision": 0.9402985074626866, "eval_overall_recall": 0.949748743718593, "eval_runtime": 0.2823, "eval_samples_per_second": 602.183, "eval_steps_per_second": 10.627, "step": 7776 }, { "epoch": 82.0, "grad_norm": 0.5665716528892517, "learning_rate": 9e-06, "loss": 0.0315, "step": 7872 }, { "epoch": 82.0, "eval_LOCATION_f1": 0.9316239316239315, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.923728813559322, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9333333333333332, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9363057324840764, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03403652459383011, "eval_overall_accuracy": 0.9903925336261323, "eval_overall_f1": 0.9498746867167919, "eval_overall_precision": 0.9475, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2825, "eval_samples_per_second": 601.815, "eval_steps_per_second": 10.62, "step": 7872 }, { "epoch": 83.0, "grad_norm": 0.9708644151687622, "learning_rate": 8.500000000000002e-06, "loss": 0.0321, "step": 7968 }, { "epoch": 83.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9367088607594937, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9367088607594937, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03434957191348076, "eval_overall_accuracy": 0.990118034586879, "eval_overall_f1": 0.9522613065326633, "eval_overall_precision": 0.9522613065326633, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2812, "eval_samples_per_second": 604.564, "eval_steps_per_second": 10.669, "step": 7968 }, { "epoch": 84.0, "grad_norm": 1.0318374633789062, "learning_rate": 8.000000000000001e-06, "loss": 0.0317, "step": 8064 }, { "epoch": 84.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9367088607594937, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9367088607594937, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03397549316287041, "eval_overall_accuracy": 0.990118034586879, "eval_overall_f1": 0.9522613065326633, "eval_overall_precision": 0.9522613065326633, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2802, "eval_samples_per_second": 606.677, "eval_steps_per_second": 10.706, "step": 8064 }, { "epoch": 85.0, "grad_norm": 1.0523338317871094, "learning_rate": 7.5e-06, "loss": 0.0324, "step": 8160 }, { "epoch": 85.0, "eval_LOCATION_f1": 0.9184549356223175, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9145299145299145, "eval_LOCATION_recall": 0.9224137931034483, "eval_ORGANIZATION_f1": 0.9245283018867925, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.91875, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.033996351063251495, "eval_overall_accuracy": 0.9892945374691189, "eval_overall_f1": 0.9425, "eval_overall_precision": 0.9378109452736318, "eval_overall_recall": 0.9472361809045227, "eval_runtime": 0.2808, "eval_samples_per_second": 605.465, "eval_steps_per_second": 10.685, "step": 8160 }, { "epoch": 86.0, "grad_norm": 0.45407137274742126, "learning_rate": 7.000000000000001e-06, "loss": 0.0317, "step": 8256 }, { "epoch": 86.0, "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9316239316239316, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9363057324840764, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9423076923076923, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03390591964125633, "eval_overall_accuracy": 0.990118034586879, "eval_overall_f1": 0.9522613065326633, "eval_overall_precision": 0.9522613065326633, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2806, "eval_samples_per_second": 605.825, "eval_steps_per_second": 10.691, "step": 8256 }, { "epoch": 87.0, "grad_norm": 0.26913008093833923, "learning_rate": 6.5000000000000004e-06, "loss": 0.0308, "step": 8352 }, { "epoch": 87.0, "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9316239316239316, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9363057324840764, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9423076923076923, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03465632349252701, "eval_overall_accuracy": 0.9898435355476256, "eval_overall_f1": 0.9522613065326633, "eval_overall_precision": 0.9522613065326633, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.28, "eval_samples_per_second": 607.249, "eval_steps_per_second": 10.716, "step": 8352 }, { "epoch": 88.0, "grad_norm": 0.30417150259017944, "learning_rate": 6e-06, "loss": 0.0311, "step": 8448 }, { "epoch": 88.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9367088607594937, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9367088607594937, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.034424372017383575, "eval_overall_accuracy": 0.9898435355476256, "eval_overall_f1": 0.9522613065326633, "eval_overall_precision": 0.9522613065326633, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2814, "eval_samples_per_second": 604.076, "eval_steps_per_second": 10.66, "step": 8448 }, { "epoch": 89.0, "grad_norm": 0.7772185206413269, "learning_rate": 5.500000000000001e-06, "loss": 0.0295, "step": 8544 }, { "epoch": 89.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9426751592356688, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.034607380628585815, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.9534591194968555, "eval_overall_precision": 0.9546599496221663, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2806, "eval_samples_per_second": 605.778, "eval_steps_per_second": 10.69, "step": 8544 }, { "epoch": 90.0, "grad_norm": 0.6779617071151733, "learning_rate": 5e-06, "loss": 0.0304, "step": 8640 }, { "epoch": 90.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9426751592356688, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03426329791545868, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.9534591194968555, "eval_overall_precision": 0.9546599496221663, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2819, "eval_samples_per_second": 603.089, "eval_steps_per_second": 10.643, "step": 8640 }, { "epoch": 91.0, "grad_norm": 1.0395246744155884, "learning_rate": 4.5e-06, "loss": 0.0315, "step": 8736 }, { "epoch": 91.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9426751592356688, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03426933288574219, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.9534591194968555, "eval_overall_precision": 0.9546599496221663, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2786, "eval_samples_per_second": 610.112, "eval_steps_per_second": 10.767, "step": 8736 }, { "epoch": 92.0, "grad_norm": 0.45442330837249756, "learning_rate": 4.000000000000001e-06, "loss": 0.0314, "step": 8832 }, { "epoch": 92.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9426751592356688, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03421960771083832, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.9534591194968555, "eval_overall_precision": 0.9546599496221663, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.282, "eval_samples_per_second": 602.845, "eval_steps_per_second": 10.638, "step": 8832 }, { "epoch": 93.0, "grad_norm": 0.4602469801902771, "learning_rate": 3.5000000000000004e-06, "loss": 0.0322, "step": 8928 }, { "epoch": 93.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9426751592356688, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03397959843277931, "eval_overall_accuracy": 0.9898435355476256, "eval_overall_f1": 0.9534591194968555, "eval_overall_precision": 0.9546599496221663, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2823, "eval_samples_per_second": 602.296, "eval_steps_per_second": 10.629, "step": 8928 }, { "epoch": 94.0, "grad_norm": 0.46450167894363403, "learning_rate": 3e-06, "loss": 0.0303, "step": 9024 }, { "epoch": 94.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9367088607594937, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9367088607594937, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03426538035273552, "eval_overall_accuracy": 0.9898435355476256, "eval_overall_f1": 0.9522613065326633, "eval_overall_precision": 0.9522613065326633, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.281, "eval_samples_per_second": 604.875, "eval_steps_per_second": 10.674, "step": 9024 }, { "epoch": 95.0, "grad_norm": 0.763189971446991, "learning_rate": 2.5e-06, "loss": 0.0316, "step": 9120 }, { "epoch": 95.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9367088607594937, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9367088607594937, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.034257952123880386, "eval_overall_accuracy": 0.9898435355476256, "eval_overall_f1": 0.9522613065326633, "eval_overall_precision": 0.9522613065326633, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2819, "eval_samples_per_second": 603.132, "eval_steps_per_second": 10.644, "step": 9120 }, { "epoch": 96.0, "grad_norm": 0.3942119777202606, "learning_rate": 2.0000000000000003e-06, "loss": 0.0317, "step": 9216 }, { "epoch": 96.0, "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9426751592356688, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.034202940762043, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.9534591194968555, "eval_overall_precision": 0.9546599496221663, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2798, "eval_samples_per_second": 607.549, "eval_steps_per_second": 10.721, "step": 9216 }, { "epoch": 97.0, "grad_norm": 0.13562001287937164, "learning_rate": 1.5e-06, "loss": 0.0321, "step": 9312 }, { "epoch": 97.0, "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9316239316239316, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.939297124600639, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9483870967741935, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03407964110374451, "eval_overall_accuracy": 0.9898435355476256, "eval_overall_f1": 0.9534591194968555, "eval_overall_precision": 0.9546599496221663, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2822, "eval_samples_per_second": 602.4, "eval_steps_per_second": 10.631, "step": 9312 }, { "epoch": 98.0, "grad_norm": 1.3050485849380493, "learning_rate": 1.0000000000000002e-06, "loss": 0.0295, "step": 9408 }, { "epoch": 98.0, "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9316239316239316, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.939297124600639, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9483870967741935, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.03420589491724968, "eval_overall_accuracy": 0.9898435355476256, "eval_overall_f1": 0.9534591194968555, "eval_overall_precision": 0.9546599496221663, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.28, "eval_samples_per_second": 607.192, "eval_steps_per_second": 10.715, "step": 9408 }, { "epoch": 99.0, "grad_norm": 0.41996899247169495, "learning_rate": 5.000000000000001e-07, "loss": 0.031, "step": 9504 }, { "epoch": 99.0, "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9316239316239316, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.939297124600639, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9483870967741935, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.0341360941529274, "eval_overall_accuracy": 0.9898435355476256, "eval_overall_f1": 0.9534591194968555, "eval_overall_precision": 0.9546599496221663, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.282, "eval_samples_per_second": 602.821, "eval_steps_per_second": 10.638, "step": 9504 }, { "epoch": 100.0, "grad_norm": 0.4611692428588867, "learning_rate": 0.0, "loss": 0.0299, "step": 9600 }, { "epoch": 100.0, "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9316239316239316, "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.939297124600639, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9483870967741935, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, "eval_loss": 0.0341842956840992, "eval_overall_accuracy": 0.9895690365083722, "eval_overall_f1": 0.9534591194968555, "eval_overall_precision": 0.9546599496221663, "eval_overall_recall": 0.9522613065326633, "eval_runtime": 0.2812, "eval_samples_per_second": 604.608, "eval_steps_per_second": 10.67, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 3874712599394304.0, "train_loss": 0.07820479728281499, "train_runtime": 511.6876, "train_samples_per_second": 299.206, "train_steps_per_second": 18.761 } ], "logging_steps": 500, "max_steps": 9600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 3874712599394304.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }