diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -10,9 +10,9 @@ "log_history": [ { "epoch": 1.0, - "grad_norm": 1.3235913515090942, + "grad_norm": 1.4515146017074585, "learning_rate": 4.9500000000000004e-05, - "loss": 1.0641, + "loss": 1.0545, "step": 96 }, { @@ -29,21 +29,21 @@ "eval_PERSON_number": 124, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, - "eval_loss": 0.6655052900314331, + "eval_loss": 0.6621571183204651, "eval_overall_accuracy": 0.8394180620367828, "eval_overall_f1": 0.0, "eval_overall_precision": 0.0, "eval_overall_recall": 0.0, - "eval_runtime": 0.614, - "eval_samples_per_second": 276.861, - "eval_steps_per_second": 4.886, + "eval_runtime": 0.2785, + "eval_samples_per_second": 610.49, + "eval_steps_per_second": 10.773, "step": 96 }, { "epoch": 2.0, - "grad_norm": 1.5239665508270264, + "grad_norm": 2.103937864303589, "learning_rate": 4.9e-05, - "loss": 0.6402, + "loss": 0.64, "step": 192 }, { @@ -60,835 +60,835 @@ "eval_PERSON_number": 124, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, - "eval_loss": 0.5235269665718079, + "eval_loss": 0.5205754637718201, "eval_overall_accuracy": 0.8399670601152895, "eval_overall_f1": 0.009900990099009901, "eval_overall_precision": 0.3333333333333333, "eval_overall_recall": 0.005025125628140704, - "eval_runtime": 0.6248, - "eval_samples_per_second": 272.086, - "eval_steps_per_second": 4.802, + "eval_runtime": 0.2762, + "eval_samples_per_second": 615.56, + "eval_steps_per_second": 10.863, "step": 192 }, { "epoch": 3.0, - "grad_norm": 0.9175276160240173, + "grad_norm": 0.8918240666389465, "learning_rate": 4.85e-05, - "loss": 0.5003, + "loss": 0.503, "step": 288 }, { "epoch": 3.0, - "eval_LOCATION_f1": 0.03076923076923077, + "eval_LOCATION_f1": 0.015625, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.14285714285714285, - "eval_LOCATION_recall": 0.017241379310344827, - "eval_ORGANIZATION_f1": 0.25, + "eval_LOCATION_precision": 0.08333333333333333, + "eval_LOCATION_recall": 0.008620689655172414, + "eval_ORGANIZATION_f1": 0.2436974789915966, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.3918918918918919, + "eval_ORGANIZATION_precision": 0.3625, "eval_ORGANIZATION_recall": 0.18354430379746836, - "eval_PERSON_f1": 0.2764976958525346, - "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.3225806451612903, - "eval_PERSON_recall": 0.24193548387096775, - "eval_loss": 0.3704892694950104, - "eval_overall_accuracy": 0.8709854515509196, - "eval_overall_f1": 0.21070811744386875, - "eval_overall_precision": 0.3370165745856354, - "eval_overall_recall": 0.15326633165829145, - "eval_runtime": 0.6518, - "eval_samples_per_second": 260.796, - "eval_steps_per_second": 4.602, + "eval_PERSON_f1": 0.3214285714285714, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.36, + "eval_PERSON_recall": 0.2903225806451613, + "eval_loss": 0.37283432483673096, + "eval_overall_accuracy": 0.8718089486686796, + "eval_overall_f1": 0.22372881355932206, + "eval_overall_precision": 0.34375, + "eval_overall_recall": 0.1658291457286432, + "eval_runtime": 0.2786, + "eval_samples_per_second": 610.242, + "eval_steps_per_second": 10.769, "step": 288 }, { "epoch": 4.0, - "grad_norm": 1.2730246782302856, + "grad_norm": 1.4870269298553467, "learning_rate": 4.8e-05, - "loss": 0.353, + "loss": 0.3537, "step": 384 }, { "epoch": 4.0, - "eval_LOCATION_f1": 0.34196891191709844, - "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.42857142857142855, - "eval_LOCATION_recall": 0.28448275862068967, - "eval_ORGANIZATION_f1": 0.5100286532951289, - "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.46596858638743455, - "eval_ORGANIZATION_recall": 0.5632911392405063, - "eval_PERSON_f1": 0.6068965517241379, - "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.5301204819277109, - "eval_PERSON_recall": 0.7096774193548387, - "eval_loss": 0.25001201033592224, - "eval_overall_accuracy": 0.9214932747735383, - "eval_overall_f1": 0.5048076923076923, - "eval_overall_precision": 0.4838709677419355, - "eval_overall_recall": 0.5276381909547738, - "eval_runtime": 0.6562, - "eval_samples_per_second": 259.071, - "eval_steps_per_second": 4.572, + "eval_LOCATION_f1": 0.3125, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.39473684210526316, + "eval_LOCATION_recall": 0.25862068965517243, + "eval_ORGANIZATION_f1": 0.5120481927710843, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.4885057471264368, + "eval_ORGANIZATION_recall": 0.5379746835443038, + "eval_PERSON_f1": 0.627177700348432, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.5521472392638037, + "eval_PERSON_recall": 0.7258064516129032, + "eval_loss": 0.2518472671508789, + "eval_overall_accuracy": 0.9198462805380181, + "eval_overall_f1": 0.5055487053020961, + "eval_overall_precision": 0.4963680387409201, + "eval_overall_recall": 0.5150753768844221, + "eval_runtime": 0.2923, + "eval_samples_per_second": 581.551, + "eval_steps_per_second": 10.263, "step": 384 }, { "epoch": 5.0, - "grad_norm": 1.1444727182388306, + "grad_norm": 1.199317455291748, "learning_rate": 4.75e-05, - "loss": 0.2551, + "loss": 0.2513, "step": 480 }, { "epoch": 5.0, - "eval_LOCATION_f1": 0.5650224215246636, - "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.5887850467289719, - "eval_LOCATION_recall": 0.5431034482758621, - "eval_ORGANIZATION_f1": 0.6628895184135977, - "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.6, - "eval_ORGANIZATION_recall": 0.740506329113924, - "eval_PERSON_f1": 0.8625954198473281, - "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.8188405797101449, - "eval_PERSON_recall": 0.9112903225806451, - "eval_loss": 0.18147629499435425, - "eval_overall_accuracy": 0.9516881690914082, - "eval_overall_f1": 0.6992840095465395, - "eval_overall_precision": 0.6659090909090909, - "eval_overall_recall": 0.7361809045226131, - "eval_runtime": 0.6356, - "eval_samples_per_second": 267.464, - "eval_steps_per_second": 4.72, + "eval_LOCATION_f1": 0.5892857142857143, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.6111111111111112, + "eval_LOCATION_recall": 0.5689655172413793, + "eval_ORGANIZATION_f1": 0.6590909090909092, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.5979381443298969, + "eval_ORGANIZATION_recall": 0.7341772151898734, + "eval_PERSON_f1": 0.8571428571428572, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.8028169014084507, + "eval_PERSON_recall": 0.9193548387096774, + "eval_loss": 0.1811506599187851, + "eval_overall_accuracy": 0.9497666758166347, + "eval_overall_f1": 0.7030878859857482, + "eval_overall_precision": 0.6666666666666666, + "eval_overall_recall": 0.7437185929648241, + "eval_runtime": 0.2823, + "eval_samples_per_second": 602.152, + "eval_steps_per_second": 10.626, "step": 480 }, { "epoch": 6.0, - "grad_norm": 0.9454153776168823, + "grad_norm": 0.7906554341316223, "learning_rate": 4.7e-05, - "loss": 0.2031, + "loss": 0.1948, "step": 576 }, { "epoch": 6.0, - "eval_LOCATION_f1": 0.7866108786610879, + "eval_LOCATION_f1": 0.759493670886076, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.7642276422764228, - "eval_LOCATION_recall": 0.8103448275862069, - "eval_ORGANIZATION_f1": 0.7678571428571429, + "eval_LOCATION_precision": 0.743801652892562, + "eval_LOCATION_recall": 0.7758620689655172, + "eval_ORGANIZATION_f1": 0.7659574468085106, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.7247191011235955, - "eval_ORGANIZATION_recall": 0.8164556962025317, - "eval_PERSON_f1": 0.9420849420849421, + "eval_ORGANIZATION_precision": 0.7368421052631579, + "eval_ORGANIZATION_recall": 0.7974683544303798, + "eval_PERSON_f1": 0.9348659003831418, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9037037037037037, + "eval_PERSON_precision": 0.8905109489051095, "eval_PERSON_recall": 0.9838709677419355, - "eval_loss": 0.14112454652786255, - "eval_overall_accuracy": 0.9670601152895965, - "eval_overall_f1": 0.827338129496403, - "eval_overall_precision": 0.7912844036697247, - "eval_overall_recall": 0.8668341708542714, - "eval_runtime": 0.6226, - "eval_samples_per_second": 273.047, - "eval_steps_per_second": 4.818, + "eval_loss": 0.13590684533119202, + "eval_overall_accuracy": 0.9656876200933296, + "eval_overall_f1": 0.8174123337363967, + "eval_overall_precision": 0.7878787878787878, + "eval_overall_recall": 0.8492462311557789, + "eval_runtime": 0.2829, + "eval_samples_per_second": 600.915, + "eval_steps_per_second": 10.604, "step": 576 }, { "epoch": 7.0, - "grad_norm": 1.3707348108291626, + "grad_norm": 1.447137475013733, "learning_rate": 4.6500000000000005e-05, - "loss": 0.1672, + "loss": 0.1623, "step": 672 }, { "epoch": 7.0, - "eval_LOCATION_f1": 0.8166666666666667, + "eval_LOCATION_f1": 0.8050847457627119, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.7903225806451613, - "eval_LOCATION_recall": 0.8448275862068966, - "eval_ORGANIZATION_f1": 0.7914110429447854, + "eval_LOCATION_precision": 0.7916666666666666, + "eval_LOCATION_recall": 0.8189655172413793, + "eval_ORGANIZATION_f1": 0.7852760736196319, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.7678571428571429, - "eval_ORGANIZATION_recall": 0.8164556962025317, - "eval_PERSON_f1": 0.9312977099236642, + "eval_ORGANIZATION_precision": 0.7619047619047619, + "eval_ORGANIZATION_recall": 0.810126582278481, + "eval_PERSON_f1": 0.9457364341085271, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.8840579710144928, + "eval_PERSON_precision": 0.9104477611940298, "eval_PERSON_recall": 0.9838709677419355, - "eval_loss": 0.11533728241920471, - "eval_overall_accuracy": 0.9695306066428767, - "eval_overall_f1": 0.8429951690821256, - "eval_overall_precision": 0.8116279069767441, - "eval_overall_recall": 0.8768844221105527, - "eval_runtime": 0.6081, - "eval_samples_per_second": 279.559, - "eval_steps_per_second": 4.933, + "eval_loss": 0.11087985336780548, + "eval_overall_accuracy": 0.9700796047213834, + "eval_overall_f1": 0.8414634146341463, + "eval_overall_precision": 0.8175355450236966, + "eval_overall_recall": 0.8668341708542714, + "eval_runtime": 0.2806, + "eval_samples_per_second": 605.903, + "eval_steps_per_second": 10.692, "step": 672 }, { "epoch": 8.0, - "grad_norm": 0.7069289684295654, + "grad_norm": 1.1691491603851318, "learning_rate": 4.600000000000001e-05, - "loss": 0.1481, + "loss": 0.1397, "step": 768 }, { "epoch": 8.0, - "eval_LOCATION_f1": 0.8326530612244898, - "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.7906976744186046, - "eval_LOCATION_recall": 0.8793103448275862, - "eval_ORGANIZATION_f1": 0.8301886792452831, - "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.825, - "eval_ORGANIZATION_recall": 0.8354430379746836, - "eval_PERSON_f1": 0.9568627450980393, - "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9312977099236641, - "eval_PERSON_recall": 0.9838709677419355, - "eval_loss": 0.09641187638044357, - "eval_overall_accuracy": 0.9744715893494372, - "eval_overall_f1": 0.8704156479217604, - "eval_overall_precision": 0.8476190476190476, - "eval_overall_recall": 0.8944723618090452, - "eval_runtime": 0.6094, - "eval_samples_per_second": 278.959, - "eval_steps_per_second": 4.923, + "eval_LOCATION_f1": 0.8220338983050848, + "eval_LOCATION_number": 116, + "eval_LOCATION_precision": 0.8083333333333333, + "eval_LOCATION_recall": 0.8362068965517241, + "eval_ORGANIZATION_f1": 0.8220858895705523, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.7976190476190477, + "eval_ORGANIZATION_recall": 0.8481012658227848, + "eval_PERSON_f1": 0.9647058823529411, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9389312977099237, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.09539666771888733, + "eval_overall_accuracy": 0.9739225912709305, + "eval_overall_f1": 0.8665850673194615, + "eval_overall_precision": 0.8448687350835322, + "eval_overall_recall": 0.8894472361809045, + "eval_runtime": 0.2806, + "eval_samples_per_second": 605.754, + "eval_steps_per_second": 10.69, "step": 768 }, { "epoch": 9.0, - "grad_norm": 0.9355798363685608, + "grad_norm": 0.8122084140777588, "learning_rate": 4.55e-05, - "loss": 0.132, + "loss": 0.1266, "step": 864 }, { "epoch": 9.0, - "eval_LOCATION_f1": 0.8641975308641975, + "eval_LOCATION_f1": 0.8559670781893004, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8267716535433071, - "eval_LOCATION_recall": 0.9051724137931034, - "eval_ORGANIZATION_f1": 0.8379204892966361, + "eval_LOCATION_precision": 0.8188976377952756, + "eval_LOCATION_recall": 0.896551724137931, + "eval_ORGANIZATION_f1": 0.8404907975460123, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8106508875739645, + "eval_ORGANIZATION_precision": 0.8154761904761905, "eval_ORGANIZATION_recall": 0.8670886075949367, - "eval_PERSON_f1": 0.9568627450980393, + "eval_PERSON_f1": 0.9609375, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9312977099236641, - "eval_PERSON_recall": 0.9838709677419355, - "eval_loss": 0.08645071089267731, - "eval_overall_accuracy": 0.9766675816634641, + "eval_PERSON_precision": 0.9318181818181818, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.0876610055565834, + "eval_overall_accuracy": 0.9761185835849574, "eval_overall_f1": 0.8824242424242424, "eval_overall_precision": 0.8524590163934426, "eval_overall_recall": 0.914572864321608, - "eval_runtime": 0.6253, - "eval_samples_per_second": 271.886, - "eval_steps_per_second": 4.798, + "eval_runtime": 0.2798, + "eval_samples_per_second": 607.653, + "eval_steps_per_second": 10.723, "step": 864 }, { "epoch": 10.0, - "grad_norm": 0.7637869715690613, + "grad_norm": 0.770068883895874, "learning_rate": 4.5e-05, - "loss": 0.119, + "loss": 0.1157, "step": 960 }, { "epoch": 10.0, - "eval_LOCATION_f1": 0.8666666666666666, + "eval_LOCATION_f1": 0.8823529411764706, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8387096774193549, - "eval_LOCATION_recall": 0.896551724137931, - "eval_ORGANIZATION_f1": 0.857142857142857, + "eval_LOCATION_precision": 0.860655737704918, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.8624999999999999, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8414634146341463, + "eval_ORGANIZATION_precision": 0.8518518518518519, "eval_ORGANIZATION_recall": 0.8734177215189873, - "eval_PERSON_f1": 0.9685039370078741, - "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9461538461538461, - "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.07701876759529114, - "eval_overall_accuracy": 0.978040076859731, - "eval_overall_f1": 0.8946078431372548, - "eval_overall_precision": 0.8732057416267942, - "eval_overall_recall": 0.9170854271356784, - "eval_runtime": 0.625, - "eval_samples_per_second": 271.983, - "eval_steps_per_second": 4.8, + "eval_PERSON_f1": 0.9761904761904763, + "eval_PERSON_number": 124, + "eval_PERSON_precision": 0.9609375, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.07306713610887527, + "eval_overall_accuracy": 0.9799615701345046, + "eval_overall_f1": 0.9037037037037038, + "eval_overall_precision": 0.8883495145631068, + "eval_overall_recall": 0.9195979899497487, + "eval_runtime": 0.2933, + "eval_samples_per_second": 579.604, + "eval_steps_per_second": 10.228, "step": 960 }, { "epoch": 11.0, - "grad_norm": 0.7323551177978516, + "grad_norm": 0.5027669072151184, "learning_rate": 4.4500000000000004e-05, - "loss": 0.1117, + "loss": 0.1111, "step": 1056 }, { "epoch": 11.0, - "eval_LOCATION_f1": 0.8925619834710743, + "eval_LOCATION_f1": 0.8945147679324894, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8571428571428571, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.88125, + "eval_LOCATION_precision": 0.8760330578512396, + "eval_LOCATION_recall": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.8792569659442724, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8703703703703703, - "eval_ORGANIZATION_recall": 0.8924050632911392, - "eval_PERSON_f1": 0.9761904761904763, + "eval_ORGANIZATION_precision": 0.8606060606060606, + "eval_ORGANIZATION_recall": 0.8987341772151899, + "eval_PERSON_f1": 0.9800796812749005, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9609375, + "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.0722014382481575, - "eval_overall_accuracy": 0.9805105682130113, - "eval_overall_f1": 0.914004914004914, - "eval_overall_precision": 0.8942307692307693, - "eval_overall_recall": 0.9346733668341709, - "eval_runtime": 0.6331, - "eval_samples_per_second": 268.535, - "eval_steps_per_second": 4.739, + "eval_loss": 0.06731364130973816, + "eval_overall_accuracy": 0.9813340653307714, + "eval_overall_f1": 0.9149198520345253, + "eval_overall_precision": 0.8983050847457628, + "eval_overall_recall": 0.9321608040201005, + "eval_runtime": 0.2833, + "eval_samples_per_second": 600.024, + "eval_steps_per_second": 10.589, "step": 1056 }, { "epoch": 12.0, - "grad_norm": 0.8702762722969055, + "grad_norm": 0.663327693939209, "learning_rate": 4.4000000000000006e-05, - "loss": 0.1053, + "loss": 0.1044, "step": 1152 }, { "epoch": 12.0, - "eval_LOCATION_f1": 0.8806584362139918, + "eval_LOCATION_f1": 0.8945147679324894, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.84251968503937, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.8632218844984803, + "eval_LOCATION_precision": 0.8760330578512396, + "eval_LOCATION_recall": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.8765432098765433, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8304093567251462, + "eval_ORGANIZATION_precision": 0.8554216867469879, "eval_ORGANIZATION_recall": 0.8987341772151899, - "eval_PERSON_f1": 0.9761904761904763, + "eval_PERSON_f1": 0.9800796812749005, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9609375, + "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.07104182243347168, - "eval_overall_accuracy": 0.9791380730167445, - "eval_overall_f1": 0.9029126213592233, - "eval_overall_precision": 0.8732394366197183, - "eval_overall_recall": 0.9346733668341709, - "eval_runtime": 0.6246, - "eval_samples_per_second": 272.165, - "eval_steps_per_second": 4.803, + "eval_loss": 0.06354989856481552, + "eval_overall_accuracy": 0.981059566291518, + "eval_overall_f1": 0.9137931034482758, + "eval_overall_precision": 0.8961352657004831, + "eval_overall_recall": 0.9321608040201005, + "eval_runtime": 0.282, + "eval_samples_per_second": 602.932, + "eval_steps_per_second": 10.64, "step": 1152 }, { "epoch": 13.0, - "grad_norm": 4.306459426879883, + "grad_norm": 1.0790586471557617, "learning_rate": 4.35e-05, - "loss": 0.1015, + "loss": 0.098, "step": 1248 }, { "epoch": 13.0, - "eval_LOCATION_f1": 0.9059829059829059, + "eval_LOCATION_f1": 0.8974358974358974, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8983050847457628, - "eval_LOCATION_recall": 0.9137931034482759, - "eval_ORGANIZATION_f1": 0.8765432098765433, + "eval_LOCATION_precision": 0.8898305084745762, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.8722741433021807, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8554216867469879, - "eval_ORGANIZATION_recall": 0.8987341772151899, - "eval_PERSON_f1": 0.9682539682539683, + "eval_ORGANIZATION_precision": 0.8588957055214724, + "eval_ORGANIZATION_recall": 0.8860759493670886, + "eval_PERSON_f1": 0.9840000000000001, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.953125, - "eval_PERSON_recall": 0.9838709677419355, - "eval_loss": 0.05972462147474289, - "eval_overall_accuracy": 0.9818830634092781, - "eval_overall_f1": 0.9135802469135802, - "eval_overall_precision": 0.8980582524271845, - "eval_overall_recall": 0.9296482412060302, - "eval_runtime": 0.6233, - "eval_samples_per_second": 272.73, - "eval_steps_per_second": 4.813, + "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.057803478091955185, + "eval_overall_accuracy": 0.9816085643700247, + "eval_overall_f1": 0.9142857142857144, + "eval_overall_precision": 0.9041769041769042, + "eval_overall_recall": 0.9246231155778895, + "eval_runtime": 0.2823, + "eval_samples_per_second": 602.275, + "eval_steps_per_second": 10.628, "step": 1248 }, { "epoch": 14.0, - "grad_norm": 1.0401694774627686, + "grad_norm": 1.317100167274475, "learning_rate": 4.3e-05, - "loss": 0.0979, + "loss": 0.0939, "step": 1344 }, { "epoch": 14.0, - "eval_LOCATION_f1": 0.8945147679324894, + "eval_LOCATION_f1": 0.8898305084745763, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8760330578512396, - "eval_LOCATION_recall": 0.9137931034482759, - "eval_ORGANIZATION_f1": 0.8832807570977917, + "eval_LOCATION_precision": 0.875, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.8749999999999999, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8805031446540881, + "eval_ORGANIZATION_precision": 0.8641975308641975, "eval_ORGANIZATION_recall": 0.8860759493670886, - "eval_PERSON_f1": 0.9800796812749005, + "eval_PERSON_f1": 0.9840000000000001, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.968503937007874, + "eval_PERSON_precision": 0.9761904761904762, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.059020232409238815, - "eval_overall_accuracy": 0.9816085643700247, - "eval_overall_f1": 0.9167701863354037, - "eval_overall_precision": 0.9066339066339066, - "eval_overall_recall": 0.9271356783919598, - "eval_runtime": 0.6209, - "eval_samples_per_second": 273.815, - "eval_steps_per_second": 4.832, + "eval_loss": 0.05590749531984329, + "eval_overall_accuracy": 0.9818830634092781, + "eval_overall_f1": 0.9131513647642681, + "eval_overall_precision": 0.9019607843137255, + "eval_overall_recall": 0.9246231155778895, + "eval_runtime": 0.2821, + "eval_samples_per_second": 602.62, + "eval_steps_per_second": 10.634, "step": 1344 }, { "epoch": 15.0, - "grad_norm": 0.8894268274307251, + "grad_norm": 0.728569507598877, "learning_rate": 4.25e-05, - "loss": 0.0929, + "loss": 0.091, "step": 1440 }, { "epoch": 15.0, - "eval_LOCATION_f1": 0.8983050847457628, + "eval_LOCATION_f1": 0.8936170212765958, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8833333333333333, - "eval_LOCATION_recall": 0.9137931034482759, - "eval_ORGANIZATION_f1": 0.8765432098765433, + "eval_LOCATION_precision": 0.8823529411764706, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.8685015290519877, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8554216867469879, + "eval_ORGANIZATION_precision": 0.8402366863905325, "eval_ORGANIZATION_recall": 0.8987341772151899, - "eval_PERSON_f1": 0.9761904761904763, + "eval_PERSON_f1": 0.9800796812749005, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9609375, + "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.05802612379193306, - "eval_overall_accuracy": 0.981059566291518, - "eval_overall_f1": 0.9137931034482758, - "eval_overall_precision": 0.8961352657004831, - "eval_overall_recall": 0.9321608040201005, - "eval_runtime": 0.626, - "eval_samples_per_second": 271.549, - "eval_steps_per_second": 4.792, + "eval_loss": 0.05577402561903, + "eval_overall_accuracy": 0.9816085643700247, + "eval_overall_f1": 0.910209102091021, + "eval_overall_precision": 0.891566265060241, + "eval_overall_recall": 0.9296482412060302, + "eval_runtime": 0.2813, + "eval_samples_per_second": 604.331, + "eval_steps_per_second": 10.665, "step": 1440 }, { "epoch": 16.0, - "grad_norm": 1.4922378063201904, + "grad_norm": 1.4055036306381226, "learning_rate": 4.2e-05, - "loss": 0.0881, + "loss": 0.088, "step": 1536 }, { "epoch": 16.0, - "eval_LOCATION_f1": 0.8945147679324894, + "eval_LOCATION_f1": 0.8898305084745763, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8760330578512396, - "eval_LOCATION_recall": 0.9137931034482759, - "eval_ORGANIZATION_f1": 0.8545454545454545, + "eval_LOCATION_precision": 0.875, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.8711656441717791, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8197674418604651, - "eval_ORGANIZATION_recall": 0.8924050632911392, - "eval_PERSON_f1": 0.9761904761904763, + "eval_ORGANIZATION_precision": 0.8452380952380952, + "eval_ORGANIZATION_recall": 0.8987341772151899, + "eval_PERSON_f1": 0.9723320158102766, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9609375, + "eval_PERSON_precision": 0.9534883720930233, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.058439191430807114, - "eval_overall_accuracy": 0.9802360691737579, - "eval_overall_f1": 0.9035409035409036, - "eval_overall_precision": 0.8788598574821853, + "eval_loss": 0.05548899993300438, + "eval_overall_accuracy": 0.981059566291518, + "eval_overall_f1": 0.9079754601226994, + "eval_overall_precision": 0.8872901678657075, "eval_overall_recall": 0.9296482412060302, - "eval_runtime": 0.6225, - "eval_samples_per_second": 273.096, - "eval_steps_per_second": 4.819, + "eval_runtime": 0.2808, + "eval_samples_per_second": 605.486, + "eval_steps_per_second": 10.685, "step": 1536 }, { "epoch": 17.0, - "grad_norm": 0.8413049578666687, + "grad_norm": 0.5631800293922424, "learning_rate": 4.15e-05, - "loss": 0.0849, + "loss": 0.0857, "step": 1632 }, { "epoch": 17.0, - "eval_LOCATION_f1": 0.8813559322033899, + "eval_LOCATION_f1": 0.8936170212765958, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8666666666666667, - "eval_LOCATION_recall": 0.896551724137931, - "eval_ORGANIZATION_f1": 0.8805031446540881, + "eval_LOCATION_precision": 0.8823529411764706, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.889589905362776, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.875, - "eval_ORGANIZATION_recall": 0.8860759493670886, - "eval_PERSON_f1": 0.9840000000000001, + "eval_ORGANIZATION_precision": 0.8867924528301887, + "eval_ORGANIZATION_recall": 0.8924050632911392, + "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.05292287841439247, - "eval_overall_accuracy": 0.9818830634092781, - "eval_overall_f1": 0.9129353233830846, - "eval_overall_precision": 0.9039408866995073, - "eval_overall_recall": 0.9221105527638191, - "eval_runtime": 0.623, - "eval_samples_per_second": 272.855, - "eval_steps_per_second": 4.815, + "eval_loss": 0.052291139960289, + "eval_overall_accuracy": 0.9846280538018117, + "eval_overall_f1": 0.9213483146067416, + "eval_overall_precision": 0.9156327543424317, + "eval_overall_recall": 0.9271356783919598, + "eval_runtime": 0.2834, + "eval_samples_per_second": 599.885, + "eval_steps_per_second": 10.586, "step": 1632 }, { "epoch": 18.0, - "grad_norm": 0.9637477993965149, + "grad_norm": 0.8045436143875122, "learning_rate": 4.1e-05, - "loss": 0.0843, + "loss": 0.0809, "step": 1728 }, { "epoch": 18.0, - "eval_LOCATION_f1": 0.9205020920502092, + "eval_LOCATION_f1": 0.8860759493670886, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8943089430894309, - "eval_LOCATION_recall": 0.9482758620689655, - "eval_ORGANIZATION_f1": 0.8785046728971962, + "eval_LOCATION_precision": 0.8677685950413223, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.8819875776397516, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8650306748466258, - "eval_ORGANIZATION_recall": 0.8924050632911392, - "eval_PERSON_f1": 0.9800796812749005, + "eval_ORGANIZATION_precision": 0.8658536585365854, + "eval_ORGANIZATION_recall": 0.8987341772151899, + "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.968503937007874, + "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.05260878801345825, - "eval_overall_accuracy": 0.9829810595662916, - "eval_overall_f1": 0.9223181257706535, - "eval_overall_precision": 0.9055690072639225, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.616, - "eval_samples_per_second": 275.955, - "eval_steps_per_second": 4.87, + "eval_loss": 0.04978971555829048, + "eval_overall_accuracy": 0.9832555586055449, + "eval_overall_f1": 0.9158415841584159, + "eval_overall_precision": 0.9024390243902439, + "eval_overall_recall": 0.9296482412060302, + "eval_runtime": 0.2824, + "eval_samples_per_second": 602.083, + "eval_steps_per_second": 10.625, "step": 1728 }, { "epoch": 19.0, - "grad_norm": 0.7680073976516724, + "grad_norm": 0.8581104278564453, "learning_rate": 4.05e-05, - "loss": 0.0826, + "loss": 0.0773, "step": 1824 }, { "epoch": 19.0, - "eval_LOCATION_f1": 0.9059829059829059, + "eval_LOCATION_f1": 0.8974358974358974, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8983050847457628, - "eval_LOCATION_recall": 0.9137931034482759, - "eval_ORGANIZATION_f1": 0.8847352024922118, + "eval_LOCATION_precision": 0.8898305084745762, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.89375, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8711656441717791, - "eval_ORGANIZATION_recall": 0.8987341772151899, + "eval_ORGANIZATION_precision": 0.8827160493827161, + "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.04939362034201622, - "eval_overall_accuracy": 0.9835300576447983, - "eval_overall_f1": 0.9228855721393034, - "eval_overall_precision": 0.9137931034482759, + "eval_loss": 0.048203177750110626, + "eval_overall_accuracy": 0.9843535547625584, + "eval_overall_f1": 0.9240348692403487, + "eval_overall_precision": 0.9160493827160494, "eval_overall_recall": 0.9321608040201005, - "eval_runtime": 0.6255, - "eval_samples_per_second": 271.772, - "eval_steps_per_second": 4.796, + "eval_runtime": 0.281, + "eval_samples_per_second": 604.968, + "eval_steps_per_second": 10.676, "step": 1824 }, { "epoch": 20.0, - "grad_norm": 0.9873287081718445, + "grad_norm": 0.8157017827033997, "learning_rate": 4e-05, - "loss": 0.0775, + "loss": 0.0765, "step": 1920 }, { "epoch": 20.0, - "eval_LOCATION_f1": 0.903765690376569, + "eval_LOCATION_f1": 0.8983050847457628, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8780487804878049, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.88, + "eval_LOCATION_precision": 0.8833333333333333, + "eval_LOCATION_recall": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.8834355828220858, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8562874251497006, - "eval_ORGANIZATION_recall": 0.9050632911392406, - "eval_PERSON_f1": 0.9840000000000001, + "eval_ORGANIZATION_precision": 0.8571428571428571, + "eval_ORGANIZATION_recall": 0.9113924050632911, + "eval_PERSON_f1": 0.9800796812749005, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_precision": 0.968503937007874, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.050805024802684784, + "eval_loss": 0.05213193967938423, "eval_overall_accuracy": 0.9821575624485315, - "eval_overall_f1": 0.918918918918919, - "eval_overall_precision": 0.8990384615384616, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6257, - "eval_samples_per_second": 271.682, - "eval_steps_per_second": 4.794, + "eval_overall_f1": 0.9175891758917589, + "eval_overall_precision": 0.8987951807228916, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.2817, + "eval_samples_per_second": 603.469, + "eval_steps_per_second": 10.649, "step": 1920 }, { "epoch": 21.0, - "grad_norm": 1.1787539720535278, + "grad_norm": 1.5243802070617676, "learning_rate": 3.9500000000000005e-05, - "loss": 0.0725, + "loss": 0.0754, "step": 2016 }, { "epoch": 21.0, - "eval_LOCATION_f1": 0.9029535864978903, + "eval_LOCATION_f1": 0.8898305084745763, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8842975206611571, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.8834355828220858, + "eval_LOCATION_precision": 0.875, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.8950617283950618, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8571428571428571, - "eval_ORGANIZATION_recall": 0.9113924050632911, + "eval_ORGANIZATION_precision": 0.8734939759036144, + "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.04908851161599159, - "eval_overall_accuracy": 0.9832555586055449, - "eval_overall_f1": 0.9211822660098522, - "eval_overall_precision": 0.9033816425120773, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.627, - "eval_samples_per_second": 271.141, - "eval_steps_per_second": 4.785, + "eval_loss": 0.0483800433576107, + "eval_overall_accuracy": 0.984079055723305, + "eval_overall_f1": 0.9221260815822002, + "eval_overall_precision": 0.9075425790754258, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.2805, + "eval_samples_per_second": 606.104, + "eval_steps_per_second": 10.696, "step": 2016 }, { "epoch": 22.0, - "grad_norm": 1.0315650701522827, + "grad_norm": 1.5905965566635132, "learning_rate": 3.9000000000000006e-05, - "loss": 0.0712, + "loss": 0.072, "step": 2112 }, { "epoch": 22.0, - "eval_LOCATION_f1": 0.9059829059829059, + "eval_LOCATION_f1": 0.8898305084745763, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8983050847457628, - "eval_LOCATION_recall": 0.9137931034482759, - "eval_ORGANIZATION_f1": 0.8944099378881988, + "eval_LOCATION_precision": 0.875, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.8792569659442724, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8780487804878049, - "eval_ORGANIZATION_recall": 0.9113924050632911, - "eval_PERSON_f1": 0.9840000000000001, + "eval_ORGANIZATION_precision": 0.8606060606060606, + "eval_ORGANIZATION_recall": 0.8987341772151899, + "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.04578203707933426, + "eval_loss": 0.046878568828105927, "eval_overall_accuracy": 0.9835300576447983, - "eval_overall_f1": 0.9255583126550867, - "eval_overall_precision": 0.9142156862745098, - "eval_overall_recall": 0.9371859296482412, - "eval_runtime": 0.6301, - "eval_samples_per_second": 269.818, - "eval_steps_per_second": 4.761, + "eval_overall_f1": 0.9158415841584159, + "eval_overall_precision": 0.9024390243902439, + "eval_overall_recall": 0.9296482412060302, + "eval_runtime": 0.2799, + "eval_samples_per_second": 607.427, + "eval_steps_per_second": 10.719, "step": 2112 }, { "epoch": 23.0, - "grad_norm": 0.9216381311416626, + "grad_norm": 0.7514567375183105, "learning_rate": 3.85e-05, - "loss": 0.071, + "loss": 0.0689, "step": 2208 }, { "epoch": 23.0, - "eval_LOCATION_f1": 0.8907563025210085, + "eval_LOCATION_f1": 0.8974358974358974, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8688524590163934, - "eval_LOCATION_recall": 0.9137931034482759, - "eval_ORGANIZATION_f1": 0.889589905362776, + "eval_LOCATION_precision": 0.8898305084745762, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.9028213166144201, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8867924528301887, - "eval_ORGANIZATION_recall": 0.8924050632911392, + "eval_ORGANIZATION_precision": 0.8944099378881988, + "eval_ORGANIZATION_recall": 0.9113924050632911, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.045151468366384506, - "eval_overall_accuracy": 0.984079055723305, - "eval_overall_f1": 0.9203980099502488, - "eval_overall_precision": 0.9113300492610837, - "eval_overall_recall": 0.9296482412060302, - "eval_runtime": 0.6299, - "eval_samples_per_second": 269.88, - "eval_steps_per_second": 4.763, + "eval_loss": 0.044030942022800446, + "eval_overall_accuracy": 0.9843535547625584, + "eval_overall_f1": 0.9276807980049875, + "eval_overall_precision": 0.9207920792079208, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.2806, + "eval_samples_per_second": 605.822, + "eval_steps_per_second": 10.691, "step": 2208 }, { "epoch": 24.0, - "grad_norm": 2.3887767791748047, + "grad_norm": 1.4454855918884277, "learning_rate": 3.8e-05, - "loss": 0.0708, + "loss": 0.0697, "step": 2304 }, { "epoch": 24.0, - "eval_LOCATION_f1": 0.8907563025210085, + "eval_LOCATION_f1": 0.9012875536480686, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8688524590163934, - "eval_LOCATION_recall": 0.9137931034482759, - "eval_ORGANIZATION_f1": 0.889589905362776, + "eval_LOCATION_precision": 0.8974358974358975, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.8881789137380192, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8867924528301887, - "eval_ORGANIZATION_recall": 0.8924050632911392, + "eval_ORGANIZATION_precision": 0.896774193548387, + "eval_ORGANIZATION_recall": 0.879746835443038, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.04399710148572922, - "eval_overall_accuracy": 0.9849025528410651, - "eval_overall_f1": 0.9203980099502488, - "eval_overall_precision": 0.9113300492610837, - "eval_overall_recall": 0.9296482412060302, - "eval_runtime": 0.629, - "eval_samples_per_second": 270.279, - "eval_steps_per_second": 4.77, + "eval_loss": 0.04559732973575592, + "eval_overall_accuracy": 0.9846280538018117, + "eval_overall_f1": 0.9232704402515725, + "eval_overall_precision": 0.924433249370277, + "eval_overall_recall": 0.9221105527638191, + "eval_runtime": 0.2825, + "eval_samples_per_second": 601.735, + "eval_steps_per_second": 10.619, "step": 2304 }, { "epoch": 25.0, - "grad_norm": 0.9052558541297913, + "grad_norm": 1.2895970344543457, "learning_rate": 3.7500000000000003e-05, - "loss": 0.0681, + "loss": 0.0656, "step": 2400 }, { "epoch": 25.0, - "eval_LOCATION_f1": 0.8983050847457628, + "eval_LOCATION_f1": 0.9059829059829059, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8833333333333333, + "eval_LOCATION_precision": 0.8983050847457628, "eval_LOCATION_recall": 0.9137931034482759, - "eval_ORGANIZATION_f1": 0.8999999999999999, + "eval_ORGANIZATION_f1": 0.8867924528301886, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8888888888888888, - "eval_ORGANIZATION_recall": 0.9113924050632911, + "eval_ORGANIZATION_precision": 0.88125, + "eval_ORGANIZATION_recall": 0.8924050632911392, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.0418149009346962, - "eval_overall_accuracy": 0.9854515509195718, - "eval_overall_f1": 0.9267080745341615, - "eval_overall_precision": 0.9164619164619164, - "eval_overall_recall": 0.9371859296482412, - "eval_runtime": 0.6322, - "eval_samples_per_second": 268.906, - "eval_steps_per_second": 4.745, + "eval_loss": 0.04359252005815506, + "eval_overall_accuracy": 0.9846280538018117, + "eval_overall_f1": 0.9238451935081149, + "eval_overall_precision": 0.9181141439205955, + "eval_overall_recall": 0.9296482412060302, + "eval_runtime": 0.2817, + "eval_samples_per_second": 603.476, + "eval_steps_per_second": 10.65, "step": 2400 }, { "epoch": 26.0, - "grad_norm": 0.3684695065021515, + "grad_norm": 0.6096397638320923, "learning_rate": 3.7e-05, - "loss": 0.0637, + "loss": 0.0658, "step": 2496 }, { "epoch": 26.0, - "eval_LOCATION_f1": 0.9106382978723405, + "eval_LOCATION_f1": 0.9012875536480686, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8991596638655462, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.897196261682243, + "eval_LOCATION_precision": 0.8974358974358975, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.88125, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8834355828220859, - "eval_ORGANIZATION_recall": 0.9113924050632911, + "eval_ORGANIZATION_precision": 0.8703703703703703, + "eval_ORGANIZATION_recall": 0.8924050632911392, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.041572462767362595, - "eval_overall_accuracy": 0.9860005489980785, - "eval_overall_f1": 0.9291925465838509, - "eval_overall_precision": 0.918918918918919, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6288, - "eval_samples_per_second": 270.365, - "eval_steps_per_second": 4.771, + "eval_loss": 0.0426979586482048, + "eval_overall_accuracy": 0.984079055723305, + "eval_overall_f1": 0.9201995012468827, + "eval_overall_precision": 0.9133663366336634, + "eval_overall_recall": 0.9271356783919598, + "eval_runtime": 0.2821, + "eval_samples_per_second": 602.528, + "eval_steps_per_second": 10.633, "step": 2496 }, { "epoch": 27.0, - "grad_norm": 1.3911359310150146, + "grad_norm": 1.2615808248519897, "learning_rate": 3.65e-05, - "loss": 0.0644, + "loss": 0.065, "step": 2592 }, { "epoch": 27.0, - "eval_LOCATION_f1": 0.9067796610169492, + "eval_LOCATION_f1": 0.9051724137931035, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.8916666666666667, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.8978328173374612, + "eval_LOCATION_precision": 0.9051724137931034, + "eval_LOCATION_recall": 0.9051724137931034, + "eval_ORGANIZATION_f1": 0.897196261682243, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8787878787878788, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.8834355828220859, + "eval_ORGANIZATION_recall": 0.9113924050632911, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.04298856854438782, - "eval_overall_accuracy": 0.9860005489980785, - "eval_overall_f1": 0.9282178217821783, - "eval_overall_precision": 0.9146341463414634, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6314, - "eval_samples_per_second": 269.228, - "eval_steps_per_second": 4.751, + "eval_loss": 0.04214628413319588, + "eval_overall_accuracy": 0.9854515509195718, + "eval_overall_f1": 0.9276807980049875, + "eval_overall_precision": 0.9207920792079208, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.2802, + "eval_samples_per_second": 606.749, + "eval_steps_per_second": 10.707, "step": 2592 }, { "epoch": 28.0, - "grad_norm": 1.201261281967163, + "grad_norm": 1.0637108087539673, "learning_rate": 3.6e-05, - "loss": 0.0617, + "loss": 0.0613, "step": 2688 }, { "epoch": 28.0, - "eval_LOCATION_f1": 0.9191489361702128, + "eval_LOCATION_f1": 0.8983050847457628, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.907563025210084, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9040247678018577, + "eval_LOCATION_precision": 0.8833333333333333, + "eval_LOCATION_recall": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.8965517241379312, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8848484848484849, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.8881987577639752, + "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.042760126292705536, - "eval_overall_accuracy": 0.9865495470765853, - "eval_overall_f1": 0.9343246592317224, - "eval_overall_precision": 0.921760391198044, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.627, - "eval_samples_per_second": 271.117, - "eval_steps_per_second": 4.784, + "eval_loss": 0.041786737740039825, + "eval_overall_accuracy": 0.9854515509195718, + "eval_overall_f1": 0.9253731343283583, + "eval_overall_precision": 0.916256157635468, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.281, + "eval_samples_per_second": 604.884, + "eval_steps_per_second": 10.674, "step": 2688 }, { "epoch": 29.0, - "grad_norm": 0.6504961848258972, + "grad_norm": 0.4293781816959381, "learning_rate": 3.55e-05, - "loss": 0.0615, + "loss": 0.0591, "step": 2784 }, { "epoch": 29.0, - "eval_LOCATION_f1": 0.923076923076923, + "eval_LOCATION_f1": 0.9098712446351932, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9152542372881356, - "eval_LOCATION_recall": 0.9310344827586207, + "eval_LOCATION_precision": 0.905982905982906, + "eval_LOCATION_recall": 0.9137931034482759, "eval_ORGANIZATION_f1": 0.8965517241379312, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.8881987577639752, @@ -897,1013 +897,1013 @@ "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.04017549008131027, - "eval_overall_accuracy": 0.9862750480373319, - "eval_overall_f1": 0.9326683291770573, - "eval_overall_precision": 0.9257425742574258, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6891, - "eval_samples_per_second": 246.703, - "eval_steps_per_second": 4.354, + "eval_loss": 0.039750631898641586, + "eval_overall_accuracy": 0.9873730441943454, + "eval_overall_f1": 0.9288389513108615, + "eval_overall_precision": 0.9230769230769231, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.2819, + "eval_samples_per_second": 603.055, + "eval_steps_per_second": 10.642, "step": 2784 }, { "epoch": 30.0, - "grad_norm": 1.9796297550201416, + "grad_norm": 1.076916217803955, "learning_rate": 3.5e-05, - "loss": 0.055, + "loss": 0.06, "step": 2880 }, { "epoch": 30.0, - "eval_LOCATION_f1": 0.9184549356223175, + "eval_LOCATION_f1": 0.9098712446351932, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9145299145299145, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9068322981366461, + "eval_LOCATION_precision": 0.905982905982906, + "eval_LOCATION_recall": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.9022082018927445, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8902439024390244, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.89937106918239, + "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03879169002175331, - "eval_overall_accuracy": 0.9868240461158386, - "eval_overall_f1": 0.9353233830845771, - "eval_overall_precision": 0.9261083743842364, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6318, - "eval_samples_per_second": 269.066, - "eval_steps_per_second": 4.748, + "eval_loss": 0.03954719007015228, + "eval_overall_accuracy": 0.9865495470765853, + "eval_overall_f1": 0.9311639549436797, + "eval_overall_precision": 0.9276807980049875, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.2821, + "eval_samples_per_second": 602.55, + "eval_steps_per_second": 10.633, "step": 2880 }, { "epoch": 31.0, - "grad_norm": 0.7729928493499756, + "grad_norm": 0.6545688509941101, "learning_rate": 3.45e-05, - "loss": 0.0582, + "loss": 0.0566, "step": 2976 }, { "epoch": 31.0, - "eval_LOCATION_f1": 0.9191489361702128, + "eval_LOCATION_f1": 0.9059829059829059, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.907563025210084, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9107692307692307, + "eval_LOCATION_precision": 0.8983050847457628, + "eval_LOCATION_recall": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.89375, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8862275449101796, - "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_ORGANIZATION_precision": 0.8827160493827161, + "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03867227956652641, - "eval_overall_accuracy": 0.9873730441943454, - "eval_overall_f1": 0.9369592088998764, - "eval_overall_precision": 0.9221411192214112, - "eval_overall_recall": 0.9522613065326633, - "eval_runtime": 0.6223, - "eval_samples_per_second": 273.165, - "eval_steps_per_second": 4.821, + "eval_loss": 0.03855058550834656, + "eval_overall_accuracy": 0.9862750480373319, + "eval_overall_f1": 0.9265255292652553, + "eval_overall_precision": 0.9185185185185185, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.279, + "eval_samples_per_second": 609.398, + "eval_steps_per_second": 10.754, "step": 2976 }, { "epoch": 32.0, - "grad_norm": 1.2700538635253906, + "grad_norm": 1.2271991968154907, "learning_rate": 3.4000000000000007e-05, - "loss": 0.0567, + "loss": 0.0566, "step": 3072 }, { "epoch": 32.0, - "eval_LOCATION_f1": 0.9191489361702128, + "eval_LOCATION_f1": 0.8927038626609441, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.907563025210084, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.8999999999999999, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.896551724137931, + "eval_ORGANIZATION_f1": 0.9015873015873016, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8888888888888888, - "eval_ORGANIZATION_recall": 0.9113924050632911, + "eval_ORGANIZATION_precision": 0.9044585987261147, + "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03720666840672493, - "eval_overall_accuracy": 0.9879220422728521, - "eval_overall_f1": 0.9328358208955224, - "eval_overall_precision": 0.9236453201970444, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6263, - "eval_samples_per_second": 271.437, - "eval_steps_per_second": 4.79, + "eval_loss": 0.03917478770017624, + "eval_overall_accuracy": 0.9857260499588252, + "eval_overall_f1": 0.9259723964868257, + "eval_overall_precision": 0.924812030075188, + "eval_overall_recall": 0.9271356783919598, + "eval_runtime": 0.2818, + "eval_samples_per_second": 603.321, + "eval_steps_per_second": 10.647, "step": 3072 }, { "epoch": 33.0, - "grad_norm": 0.7994013428688049, + "grad_norm": 0.7312807440757751, "learning_rate": 3.35e-05, - "loss": 0.0581, + "loss": 0.0566, "step": 3168 }, { "epoch": 33.0, - "eval_LOCATION_f1": 0.9152542372881356, + "eval_LOCATION_f1": 0.9106382978723405, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9068322981366461, + "eval_LOCATION_precision": 0.8991596638655462, + "eval_LOCATION_recall": 0.9224137931034483, + "eval_ORGANIZATION_f1": 0.9015873015873016, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.8902439024390244, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9044585987261147, + "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.038004301488399506, - "eval_overall_accuracy": 0.9876475432335987, - "eval_overall_f1": 0.9343246592317224, - "eval_overall_precision": 0.921760391198044, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6369, - "eval_samples_per_second": 266.898, - "eval_steps_per_second": 4.71, + "eval_loss": 0.039775025099515915, + "eval_overall_accuracy": 0.9865495470765853, + "eval_overall_f1": 0.9311639549436797, + "eval_overall_precision": 0.9276807980049875, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.2816, + "eval_samples_per_second": 603.775, + "eval_steps_per_second": 10.655, "step": 3168 }, { "epoch": 34.0, - "grad_norm": 1.298720121383667, + "grad_norm": 0.8602070212364197, "learning_rate": 3.3e-05, - "loss": 0.0534, + "loss": 0.0568, "step": 3264 }, { "epoch": 34.0, - "eval_LOCATION_f1": 0.9310344827586207, + "eval_LOCATION_f1": 0.9224137931034483, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9310344827586207, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9254658385093167, + "eval_LOCATION_precision": 0.9224137931034483, + "eval_LOCATION_recall": 0.9224137931034483, + "eval_ORGANIZATION_f1": 0.90625, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9085365853658537, - "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_ORGANIZATION_precision": 0.8950617283950617, + "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.039352480322122574, - "eval_overall_accuracy": 0.9876475432335987, - "eval_overall_f1": 0.946450809464508, - "eval_overall_precision": 0.9382716049382716, - "eval_overall_recall": 0.9547738693467337, - "eval_runtime": 0.6236, - "eval_samples_per_second": 272.632, - "eval_steps_per_second": 4.811, + "eval_loss": 0.03961801528930664, + "eval_overall_accuracy": 0.987098545155092, + "eval_overall_f1": 0.9363295880149813, + "eval_overall_precision": 0.9305210918114144, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.2818, + "eval_samples_per_second": 603.331, + "eval_steps_per_second": 10.647, "step": 3264 }, { "epoch": 35.0, - "grad_norm": 1.8338191509246826, + "grad_norm": 1.1235308647155762, "learning_rate": 3.2500000000000004e-05, - "loss": 0.0536, + "loss": 0.0532, "step": 3360 }, { "epoch": 35.0, - "eval_LOCATION_f1": 0.923076923076923, + "eval_LOCATION_f1": 0.9059829059829059, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9152542372881356, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9254658385093167, + "eval_LOCATION_precision": 0.8983050847457628, + "eval_LOCATION_recall": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.9050632911392406, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9085365853658537, - "eval_ORGANIZATION_recall": 0.9430379746835443, + "eval_ORGANIZATION_precision": 0.9050632911392406, + "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03686641529202461, - "eval_overall_accuracy": 0.9887455393906122, - "eval_overall_f1": 0.9440993788819877, - "eval_overall_precision": 0.9336609336609336, - "eval_overall_recall": 0.9547738693467337, - "eval_runtime": 0.6916, - "eval_samples_per_second": 245.813, - "eval_steps_per_second": 4.338, + "eval_loss": 0.037914156913757324, + "eval_overall_accuracy": 0.987098545155092, + "eval_overall_f1": 0.9311639549436797, + "eval_overall_precision": 0.9276807980049875, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.2789, + "eval_samples_per_second": 609.623, + "eval_steps_per_second": 10.758, "step": 3360 }, { "epoch": 36.0, - "grad_norm": 1.2448095083236694, + "grad_norm": 0.49202960729599, "learning_rate": 3.2000000000000005e-05, - "loss": 0.0511, + "loss": 0.052, "step": 3456 }, { "epoch": 36.0, - "eval_LOCATION_f1": 0.9243697478991596, + "eval_LOCATION_f1": 0.927038626609442, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9016393442622951, - "eval_LOCATION_recall": 0.9482758620689655, - "eval_ORGANIZATION_f1": 0.9211356466876972, + "eval_LOCATION_precision": 0.9230769230769231, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9125, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9182389937106918, + "eval_ORGANIZATION_precision": 0.9012345679012346, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.04009932279586792, - "eval_overall_accuracy": 0.9876475432335987, - "eval_overall_f1": 0.9427860696517413, - "eval_overall_precision": 0.9334975369458128, - "eval_overall_recall": 0.9522613065326633, - "eval_runtime": 0.6343, - "eval_samples_per_second": 268.001, - "eval_steps_per_second": 4.729, + "eval_loss": 0.04034107178449631, + "eval_overall_accuracy": 0.9879220422728521, + "eval_overall_f1": 0.9401496259351622, + "eval_overall_precision": 0.9331683168316832, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.2807, + "eval_samples_per_second": 605.629, + "eval_steps_per_second": 10.688, "step": 3456 }, { "epoch": 37.0, - "grad_norm": 1.1896450519561768, + "grad_norm": 1.104040503501892, "learning_rate": 3.15e-05, - "loss": 0.0512, + "loss": 0.0516, "step": 3552 }, { "epoch": 37.0, - "eval_LOCATION_f1": 0.9224137931034483, + "eval_LOCATION_f1": 0.9059829059829059, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9224137931034483, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9206349206349206, + "eval_LOCATION_precision": 0.8983050847457628, + "eval_LOCATION_recall": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.9056603773584907, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9235668789808917, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9, + "eval_ORGANIZATION_recall": 0.9113924050632911, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.038715094327926636, - "eval_overall_accuracy": 0.9868240461158386, - "eval_overall_f1": 0.9422110552763819, - "eval_overall_precision": 0.9422110552763819, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6255, - "eval_samples_per_second": 271.794, - "eval_steps_per_second": 4.796, + "eval_loss": 0.03864772990345955, + "eval_overall_accuracy": 0.9873730441943454, + "eval_overall_f1": 0.9313358302122346, + "eval_overall_precision": 0.9255583126550868, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.2787, + "eval_samples_per_second": 609.902, + "eval_steps_per_second": 10.763, "step": 3552 }, { "epoch": 38.0, - "grad_norm": 0.7601023316383362, + "grad_norm": 0.9916018843650818, "learning_rate": 3.1e-05, - "loss": 0.0498, + "loss": 0.0497, "step": 3648 }, { "epoch": 38.0, - "eval_LOCATION_f1": 0.9184549356223175, + "eval_LOCATION_f1": 0.9106382978723405, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9145299145299145, + "eval_LOCATION_precision": 0.8991596638655462, "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9148264984227129, + "eval_ORGANIZATION_f1": 0.9022082018927445, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9119496855345912, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.89937106918239, + "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03696443885564804, + "eval_loss": 0.03776225075125694, "eval_overall_accuracy": 0.9879220422728521, - "eval_overall_f1": 0.9386733416770965, - "eval_overall_precision": 0.9351620947630923, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6226, - "eval_samples_per_second": 273.03, - "eval_steps_per_second": 4.818, + "eval_overall_f1": 0.9313358302122346, + "eval_overall_precision": 0.9255583126550868, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.2818, + "eval_samples_per_second": 603.336, + "eval_steps_per_second": 10.647, "step": 3648 }, { "epoch": 39.0, - "grad_norm": 1.1390430927276611, + "grad_norm": 0.43910089135169983, "learning_rate": 3.05e-05, - "loss": 0.0489, + "loss": 0.052, "step": 3744 }, { "epoch": 39.0, - "eval_LOCATION_f1": 0.927038626609442, + "eval_LOCATION_f1": 0.9137931034482759, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9230769230769231, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9192546583850932, + "eval_LOCATION_precision": 0.9137931034482759, + "eval_LOCATION_recall": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.9090909090909092, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9024390243902439, - "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_ORGANIZATION_precision": 0.9006211180124224, + "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03947949409484863, - "eval_overall_accuracy": 0.9876475432335987, - "eval_overall_f1": 0.9427860696517413, - "eval_overall_precision": 0.9334975369458128, - "eval_overall_recall": 0.9522613065326633, - "eval_runtime": 0.6271, - "eval_samples_per_second": 271.101, - "eval_steps_per_second": 4.784, + "eval_loss": 0.036575764417648315, + "eval_overall_accuracy": 0.9884710403513588, + "eval_overall_f1": 0.9349999999999999, + "eval_overall_precision": 0.9303482587064676, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.2834, + "eval_samples_per_second": 599.755, + "eval_steps_per_second": 10.584, "step": 3744 }, { "epoch": 40.0, - "grad_norm": 1.1784359216690063, + "grad_norm": 0.8054441809654236, "learning_rate": 3e-05, - "loss": 0.0484, + "loss": 0.0472, "step": 3840 }, { "epoch": 40.0, - "eval_LOCATION_f1": 0.9184549356223175, + "eval_LOCATION_f1": 0.9137931034482759, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9145299145299145, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9015873015873016, + "eval_LOCATION_precision": 0.9137931034482759, + "eval_LOCATION_recall": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.8987341772151899, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9044585987261147, + "eval_ORGANIZATION_precision": 0.8987341772151899, "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03872258961200714, - "eval_overall_accuracy": 0.9862750480373319, - "eval_overall_f1": 0.9335006273525721, - "eval_overall_precision": 0.9323308270676691, - "eval_overall_recall": 0.9346733668341709, - "eval_runtime": 0.6311, - "eval_samples_per_second": 269.362, - "eval_steps_per_second": 4.753, + "eval_loss": 0.03671231493353844, + "eval_overall_accuracy": 0.9868240461158386, + "eval_overall_f1": 0.9309912170639899, + "eval_overall_precision": 0.9298245614035088, + "eval_overall_recall": 0.9321608040201005, + "eval_runtime": 0.2821, + "eval_samples_per_second": 602.601, + "eval_steps_per_second": 10.634, "step": 3840 }, { "epoch": 41.0, - "grad_norm": 0.21162720024585724, + "grad_norm": 0.3345367908477783, "learning_rate": 2.95e-05, - "loss": 0.0482, + "loss": 0.0486, "step": 3936 }, { "epoch": 41.0, - "eval_LOCATION_f1": 0.9310344827586207, + "eval_LOCATION_f1": 0.9191489361702128, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9310344827586207, + "eval_LOCATION_precision": 0.907563025210084, "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9249999999999999, + "eval_ORGANIZATION_f1": 0.91875, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9135802469135802, - "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_ORGANIZATION_precision": 0.9074074074074074, + "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.038246579468250275, - "eval_overall_accuracy": 0.9876475432335987, - "eval_overall_f1": 0.9463171036204745, - "eval_overall_precision": 0.9404466501240695, - "eval_overall_recall": 0.9522613065326633, - "eval_runtime": 0.6407, - "eval_samples_per_second": 265.354, - "eval_steps_per_second": 4.683, + "eval_loss": 0.038836024701595306, + "eval_overall_accuracy": 0.9881965413121054, + "eval_overall_f1": 0.9402985074626866, + "eval_overall_precision": 0.9310344827586207, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.2815, + "eval_samples_per_second": 603.989, + "eval_steps_per_second": 10.659, "step": 3936 }, { "epoch": 42.0, - "grad_norm": 0.4566030502319336, + "grad_norm": 0.16970324516296387, "learning_rate": 2.9e-05, - "loss": 0.0466, + "loss": 0.047, "step": 4032 }, { "epoch": 42.0, - "eval_LOCATION_f1": 0.923076923076923, + "eval_LOCATION_f1": 0.9145299145299146, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9152542372881356, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9211356466876972, + "eval_LOCATION_precision": 0.9067796610169492, + "eval_LOCATION_recall": 0.9224137931034483, + "eval_ORGANIZATION_f1": 0.9073482428115016, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9182389937106918, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9161290322580645, + "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03999612480401993, - "eval_overall_accuracy": 0.987098545155092, - "eval_overall_f1": 0.9425, - "eval_overall_precision": 0.9378109452736318, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6254, - "eval_samples_per_second": 271.821, - "eval_steps_per_second": 4.797, + "eval_loss": 0.037543974816799164, + "eval_overall_accuracy": 0.9873730441943454, + "eval_overall_f1": 0.9346733668341709, + "eval_overall_precision": 0.9346733668341709, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.28, + "eval_samples_per_second": 607.074, + "eval_steps_per_second": 10.713, "step": 4032 }, { "epoch": 43.0, - "grad_norm": 1.2713156938552856, + "grad_norm": 0.6317854523658752, "learning_rate": 2.8499999999999998e-05, - "loss": 0.0461, + "loss": 0.0481, "step": 4128 }, { "epoch": 43.0, - "eval_LOCATION_f1": 0.9137931034482759, + "eval_LOCATION_f1": 0.9059829059829059, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9137931034482759, + "eval_LOCATION_precision": 0.8983050847457628, "eval_LOCATION_recall": 0.9137931034482759, - "eval_ORGANIZATION_f1": 0.9022082018927445, + "eval_ORGANIZATION_f1": 0.9050632911392406, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.89937106918239, + "eval_ORGANIZATION_precision": 0.9050632911392406, "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03929824382066727, - "eval_overall_accuracy": 0.9868240461158386, - "eval_overall_f1": 0.9323308270676693, - "eval_overall_precision": 0.93, + "eval_loss": 0.03800812363624573, + "eval_overall_accuracy": 0.9860005489980785, + "eval_overall_f1": 0.9311639549436797, + "eval_overall_precision": 0.9276807980049875, "eval_overall_recall": 0.9346733668341709, - "eval_runtime": 0.6205, - "eval_samples_per_second": 273.958, - "eval_steps_per_second": 4.835, + "eval_runtime": 0.2816, + "eval_samples_per_second": 603.734, + "eval_steps_per_second": 10.654, "step": 4128 }, { "epoch": 44.0, - "grad_norm": 0.5311025977134705, + "grad_norm": 0.9596404433250427, "learning_rate": 2.8000000000000003e-05, - "loss": 0.0462, + "loss": 0.0468, "step": 4224 }, { "epoch": 44.0, - "eval_LOCATION_f1": 0.9310344827586207, + "eval_LOCATION_f1": 0.927038626609442, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9310344827586207, + "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9171974522292994, + "eval_ORGANIZATION_f1": 0.9119496855345911, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9230769230769231, - "eval_ORGANIZATION_recall": 0.9113924050632911, - "eval_PERSON_f1": 0.976, + "eval_ORGANIZATION_precision": 0.90625, + "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9682539682539683, - "eval_PERSON_recall": 0.9838709677419355, - "eval_loss": 0.03938368707895279, - "eval_overall_accuracy": 0.9865495470765853, - "eval_overall_f1": 0.9396984924623115, - "eval_overall_precision": 0.9396984924623115, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.627, - "eval_samples_per_second": 271.145, - "eval_steps_per_second": 4.785, + "eval_PERSON_precision": 0.984, + "eval_PERSON_recall": 0.9919354838709677, + "eval_loss": 0.03910643607378006, + "eval_overall_accuracy": 0.9876475432335987, + "eval_overall_f1": 0.94, + "eval_overall_precision": 0.9353233830845771, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.2799, + "eval_samples_per_second": 607.283, + "eval_steps_per_second": 10.717, "step": 4224 }, { "epoch": 45.0, - "grad_norm": 1.5404696464538574, + "grad_norm": 1.3304390907287598, "learning_rate": 2.7500000000000004e-05, - "loss": 0.0447, + "loss": 0.0473, "step": 4320 }, { "epoch": 45.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.9106382978723405, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, + "eval_LOCATION_precision": 0.8991596638655462, "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9206349206349206, + "eval_ORGANIZATION_f1": 0.9015873015873016, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9235668789808917, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9044585987261147, + "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03845032677054405, - "eval_overall_accuracy": 0.9873730441943454, - "eval_overall_f1": 0.9433962264150944, - "eval_overall_precision": 0.9445843828715366, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6186, - "eval_samples_per_second": 274.813, - "eval_steps_per_second": 4.85, + "eval_loss": 0.03656316548585892, + "eval_overall_accuracy": 0.9868240461158386, + "eval_overall_f1": 0.9311639549436797, + "eval_overall_precision": 0.9276807980049875, + "eval_overall_recall": 0.9346733668341709, + "eval_runtime": 0.2813, + "eval_samples_per_second": 604.337, + "eval_steps_per_second": 10.665, "step": 4320 }, { "epoch": 46.0, - "grad_norm": 0.4397522807121277, + "grad_norm": 0.8157849907875061, "learning_rate": 2.7000000000000002e-05, - "loss": 0.0442, + "loss": 0.0441, "step": 4416 }, { "epoch": 46.0, - "eval_LOCATION_f1": 0.935064935064935, + "eval_LOCATION_f1": 0.9152542372881356, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9182389937106918, + "eval_ORGANIZATION_f1": 0.9090909090909092, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9125, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9006211180124224, + "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03776326775550842, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.9448621553884713, - "eval_overall_precision": 0.9425, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6325, - "eval_samples_per_second": 268.781, - "eval_steps_per_second": 4.743, + "eval_loss": 0.03721893951296806, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.9353233830845771, + "eval_overall_precision": 0.9261083743842364, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.2817, + "eval_samples_per_second": 603.43, + "eval_steps_per_second": 10.649, "step": 4416 }, { "epoch": 47.0, - "grad_norm": 1.0225719213485718, + "grad_norm": 1.0967856645584106, "learning_rate": 2.6500000000000004e-05, - "loss": 0.0456, + "loss": 0.0441, "step": 4512 }, { "epoch": 47.0, - "eval_LOCATION_f1": 0.9304347826086956, + "eval_LOCATION_f1": 0.9224137931034483, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9385964912280702, + "eval_LOCATION_precision": 0.9224137931034483, "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9316770186335404, + "eval_ORGANIZATION_f1": 0.9153605015673981, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9146341463414634, - "eval_ORGANIZATION_recall": 0.9493670886075949, + "eval_ORGANIZATION_precision": 0.906832298136646, + "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.037870533764362335, - "eval_overall_accuracy": 0.9892945374691189, - "eval_overall_f1": 0.9488139825218478, - "eval_overall_precision": 0.9429280397022333, - "eval_overall_recall": 0.9547738693467337, - "eval_runtime": 0.6338, - "eval_samples_per_second": 268.242, - "eval_steps_per_second": 4.734, + "eval_loss": 0.037474945187568665, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.94, + "eval_overall_precision": 0.9353233830845771, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.2834, + "eval_samples_per_second": 599.905, + "eval_steps_per_second": 10.587, "step": 4512 }, { "epoch": 48.0, - "grad_norm": 0.7907796502113342, + "grad_norm": 0.8679947853088379, "learning_rate": 2.6000000000000002e-05, - "loss": 0.0444, + "loss": 0.0416, "step": 4608 }, { "epoch": 48.0, - "eval_LOCATION_f1": 0.9310344827586207, + "eval_LOCATION_f1": 0.9316239316239315, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9310344827586207, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9182389937106918, + "eval_LOCATION_precision": 0.923728813559322, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9333333333333332, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9125, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9363057324840764, + "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03684180602431297, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.9436795994993743, - "eval_overall_precision": 0.940149625935162, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6242, - "eval_samples_per_second": 272.342, - "eval_steps_per_second": 4.806, + "eval_loss": 0.03589639067649841, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9498746867167919, + "eval_overall_precision": 0.9475, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2828, + "eval_samples_per_second": 601.064, + "eval_steps_per_second": 10.607, "step": 4608 }, { "epoch": 49.0, - "grad_norm": 1.0734015703201294, + "grad_norm": 0.5795214176177979, "learning_rate": 2.5500000000000003e-05, - "loss": 0.045, + "loss": 0.0446, "step": 4704 }, { "epoch": 49.0, - "eval_LOCATION_f1": 0.9316239316239315, + "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.923728813559322, - "eval_LOCATION_recall": 0.9396551724137931, - "eval_ORGANIZATION_f1": 0.9240506329113924, + "eval_LOCATION_precision": 0.9152542372881356, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.8958990536277602, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9240506329113924, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.8930817610062893, + "eval_ORGANIZATION_recall": 0.8987341772151899, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03768336772918701, - "eval_overall_accuracy": 0.9887455393906122, - "eval_overall_f1": 0.9461827284105131, - "eval_overall_precision": 0.942643391521197, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.623, - "eval_samples_per_second": 272.878, - "eval_steps_per_second": 4.815, + "eval_loss": 0.03545619174838066, + "eval_overall_accuracy": 0.9876475432335987, + "eval_overall_f1": 0.9325, + "eval_overall_precision": 0.927860696517413, + "eval_overall_recall": 0.9371859296482412, + "eval_runtime": 0.2807, + "eval_samples_per_second": 605.543, + "eval_steps_per_second": 10.686, "step": 4704 }, { "epoch": 50.0, - "grad_norm": 1.0651848316192627, + "grad_norm": 1.6378672122955322, "learning_rate": 2.5e-05, - "loss": 0.0461, + "loss": 0.0425, "step": 4800 }, { "epoch": 50.0, - "eval_LOCATION_f1": 0.9224137931034483, + "eval_LOCATION_f1": 0.9276595744680851, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9224137931034483, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9177215189873418, + "eval_LOCATION_precision": 0.9159663865546218, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9056603773584907, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9177215189873418, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9, + "eval_ORGANIZATION_recall": 0.9113924050632911, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03915992006659508, - "eval_overall_accuracy": 0.987098545155092, - "eval_overall_f1": 0.9410288582183186, - "eval_overall_precision": 0.9398496240601504, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6242, - "eval_samples_per_second": 272.362, - "eval_steps_per_second": 4.806, + "eval_loss": 0.036550432443618774, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.9376558603491272, + "eval_overall_precision": 0.9306930693069307, + "eval_overall_recall": 0.9447236180904522, + "eval_runtime": 0.2824, + "eval_samples_per_second": 601.912, + "eval_steps_per_second": 10.622, "step": 4800 }, { "epoch": 51.0, - "grad_norm": 0.6373057961463928, + "grad_norm": 0.6017678380012512, "learning_rate": 2.45e-05, "loss": 0.0422, "step": 4896 }, { "epoch": 51.0, - "eval_LOCATION_f1": 0.9356223175965666, + "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9316239316239316, - "eval_LOCATION_recall": 0.9396551724137931, - "eval_ORGANIZATION_f1": 0.9177215189873418, + "eval_LOCATION_precision": 0.9152542372881356, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.910828025477707, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9177215189873418, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9166666666666666, + "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03710971772670746, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9448621553884713, - "eval_overall_precision": 0.9425, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6344, - "eval_samples_per_second": 267.952, - "eval_steps_per_second": 4.729, + "eval_loss": 0.036360133439302444, + "eval_overall_accuracy": 0.987098545155092, + "eval_overall_f1": 0.9385194479297366, + "eval_overall_precision": 0.9373433583959899, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.2813, + "eval_samples_per_second": 604.3, + "eval_steps_per_second": 10.664, "step": 4896 }, { "epoch": 52.0, - "grad_norm": 1.6656302213668823, + "grad_norm": 1.1220182180404663, "learning_rate": 2.4e-05, - "loss": 0.0419, + "loss": 0.0409, "step": 4992 }, { "epoch": 52.0, - "eval_LOCATION_f1": 0.927038626609442, + "eval_LOCATION_f1": 0.9184549356223175, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9230769230769231, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9153605015673981, - "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.906832298136646, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_LOCATION_precision": 0.9145299145299145, + "eval_LOCATION_recall": 0.9224137931034483, + "eval_ORGANIZATION_f1": 0.91875, + "eval_ORGANIZATION_number": 158, + "eval_ORGANIZATION_precision": 0.9074074074074074, + "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03852337226271629, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9413233458177278, - "eval_overall_precision": 0.9354838709677419, + "eval_loss": 0.03568459302186966, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9401496259351622, + "eval_overall_precision": 0.9331683168316832, "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6311, - "eval_samples_per_second": 269.377, - "eval_steps_per_second": 4.754, + "eval_runtime": 0.2809, + "eval_samples_per_second": 605.214, + "eval_steps_per_second": 10.68, "step": 4992 }, { "epoch": 53.0, - "grad_norm": 1.0709712505340576, + "grad_norm": 1.0612504482269287, "learning_rate": 2.35e-05, - "loss": 0.0403, + "loss": 0.0414, "step": 5088 }, { "epoch": 53.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.927038626609442, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9182389937106918, + "eval_LOCATION_precision": 0.9230769230769231, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9249999999999999, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9125, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9135802469135802, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03830442577600479, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9423558897243106, - "eval_overall_precision": 0.94, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6351, - "eval_samples_per_second": 267.692, - "eval_steps_per_second": 4.724, + "eval_loss": 0.03593689948320389, + "eval_overall_accuracy": 0.990118034586879, + "eval_overall_f1": 0.9451371571072318, + "eval_overall_precision": 0.9381188118811881, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2864, + "eval_samples_per_second": 593.537, + "eval_steps_per_second": 10.474, "step": 5088 }, { "epoch": 54.0, - "grad_norm": 1.4942684173583984, + "grad_norm": 1.130049228668213, "learning_rate": 2.3000000000000003e-05, - "loss": 0.0392, + "loss": 0.0403, "step": 5184 }, { "epoch": 54.0, - "eval_LOCATION_f1": 0.9224137931034483, + "eval_LOCATION_f1": 0.927038626609442, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9224137931034483, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9221183800623053, + "eval_LOCATION_precision": 0.9230769230769231, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9130434782608695, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9079754601226994, - "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_ORGANIZATION_precision": 0.8963414634146342, + "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.037072986364364624, - "eval_overall_accuracy": 0.9898435355476256, - "eval_overall_f1": 0.9426433915211971, - "eval_overall_precision": 0.9356435643564357, + "eval_loss": 0.03527674823999405, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9402985074626866, + "eval_overall_precision": 0.9310344827586207, "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.622, - "eval_samples_per_second": 273.303, - "eval_steps_per_second": 4.823, + "eval_runtime": 0.2806, + "eval_samples_per_second": 605.883, + "eval_steps_per_second": 10.692, "step": 5184 }, { "epoch": 55.0, - "grad_norm": 2.575190544128418, + "grad_norm": 1.4285669326782227, "learning_rate": 2.25e-05, - "loss": 0.0419, + "loss": 0.0393, "step": 5280 }, { "epoch": 55.0, - "eval_LOCATION_f1": 0.9310344827586207, + "eval_LOCATION_f1": 0.9184549356223175, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9310344827586207, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9211356466876972, + "eval_LOCATION_precision": 0.9145299145299145, + "eval_LOCATION_recall": 0.9224137931034483, + "eval_ORGANIZATION_f1": 0.9249999999999999, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9182389937106918, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9135802469135802, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03656064346432686, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.9448621553884713, - "eval_overall_precision": 0.9425, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6256, - "eval_samples_per_second": 271.758, - "eval_steps_per_second": 4.796, + "eval_loss": 0.03516939654946327, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9426433915211971, + "eval_overall_precision": 0.9356435643564357, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.2817, + "eval_samples_per_second": 603.519, + "eval_steps_per_second": 10.65, "step": 5280 }, { "epoch": 56.0, - "grad_norm": 0.9610713720321655, + "grad_norm": 0.668100893497467, "learning_rate": 2.2000000000000003e-05, - "loss": 0.0391, + "loss": 0.0405, "step": 5376 }, { "epoch": 56.0, - "eval_LOCATION_f1": 0.9310344827586207, + "eval_LOCATION_f1": 0.9316239316239315, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9310344827586207, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9211356466876972, + "eval_LOCATION_precision": 0.923728813559322, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9430379746835443, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9182389937106918, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9430379746835443, + "eval_ORGANIZATION_recall": 0.9430379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03741344437003136, - "eval_overall_accuracy": 0.9887455393906122, - "eval_overall_f1": 0.9448621553884713, - "eval_overall_precision": 0.9425, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6345, - "eval_samples_per_second": 267.915, - "eval_steps_per_second": 4.728, + "eval_loss": 0.03589174896478653, + "eval_overall_accuracy": 0.990118034586879, + "eval_overall_f1": 0.9536921151439299, + "eval_overall_precision": 0.9501246882793017, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2822, + "eval_samples_per_second": 602.42, + "eval_steps_per_second": 10.631, "step": 5376 }, { "epoch": 57.0, - "grad_norm": 0.37668493390083313, + "grad_norm": 0.7897509336471558, "learning_rate": 2.15e-05, - "loss": 0.0381, + "loss": 0.0404, "step": 5472 }, { "epoch": 57.0, - "eval_LOCATION_f1": 0.9310344827586207, + "eval_LOCATION_f1": 0.9276595744680851, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9310344827586207, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9278996865203761, + "eval_LOCATION_precision": 0.9159663865546218, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9400630914826499, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9192546583850931, - "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_ORGANIZATION_precision": 0.9371069182389937, + "eval_ORGANIZATION_recall": 0.9430379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03578212484717369, - "eval_overall_accuracy": 0.9898435355476256, - "eval_overall_f1": 0.9475, - "eval_overall_precision": 0.9427860696517413, - "eval_overall_recall": 0.9522613065326633, - "eval_runtime": 0.6253, - "eval_samples_per_second": 271.877, - "eval_steps_per_second": 4.798, + "eval_loss": 0.037044160068035126, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.951310861423221, + "eval_overall_precision": 0.9454094292803971, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.2798, + "eval_samples_per_second": 607.511, + "eval_steps_per_second": 10.721, "step": 5472 }, { "epoch": 58.0, - "grad_norm": 0.46238428354263306, + "grad_norm": 0.9230952262878418, "learning_rate": 2.1e-05, - "loss": 0.0385, + "loss": 0.0398, "step": 5568 }, { "epoch": 58.0, - "eval_LOCATION_f1": 0.9396551724137931, + "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9396551724137931, + "eval_LOCATION_precision": 0.9316239316239316, "eval_LOCATION_recall": 0.9396551724137931, - "eval_ORGANIZATION_f1": 0.9299363057324842, + "eval_ORGANIZATION_f1": 0.9337539432176657, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9358974358974359, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9308176100628931, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.037841230630874634, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.950943396226415, - "eval_overall_precision": 0.9521410579345088, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.682, - "eval_samples_per_second": 249.282, - "eval_steps_per_second": 4.399, + "eval_loss": 0.035523511469364166, + "eval_overall_accuracy": 0.9903925336261323, + "eval_overall_f1": 0.951188986232791, + "eval_overall_precision": 0.9476309226932669, + "eval_overall_recall": 0.9547738693467337, + "eval_runtime": 0.282, + "eval_samples_per_second": 602.9, + "eval_steps_per_second": 10.639, "step": 5568 }, { "epoch": 59.0, - "grad_norm": 1.334747314453125, + "grad_norm": 0.6653354167938232, "learning_rate": 2.05e-05, - "loss": 0.036, + "loss": 0.0382, "step": 5664 }, { "epoch": 59.0, - "eval_LOCATION_f1": 0.9184549356223175, + "eval_LOCATION_f1": 0.9396551724137931, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9145299145299145, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9211356466876972, + "eval_LOCATION_precision": 0.9396551724137931, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9490445859872612, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9182389937106918, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9551282051282052, + "eval_ORGANIZATION_recall": 0.9430379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03871507570147514, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.9411764705882354, - "eval_overall_precision": 0.9376558603491272, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.626, - "eval_samples_per_second": 271.565, - "eval_steps_per_second": 4.792, + "eval_loss": 0.03545346483588219, + "eval_overall_accuracy": 0.9903925336261323, + "eval_overall_f1": 0.9584905660377357, + "eval_overall_precision": 0.9596977329974811, + "eval_overall_recall": 0.957286432160804, + "eval_runtime": 0.285, + "eval_samples_per_second": 596.488, + "eval_steps_per_second": 10.526, "step": 5664 }, { "epoch": 60.0, - "grad_norm": 1.4887285232543945, + "grad_norm": 1.0160192251205444, "learning_rate": 2e-05, - "loss": 0.0401, + "loss": 0.0396, "step": 5760 }, { "epoch": 60.0, - "eval_LOCATION_f1": 0.9356223175965666, + "eval_LOCATION_f1": 0.9276595744680851, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_precision": 0.9159663865546218, "eval_LOCATION_recall": 0.9396551724137931, - "eval_ORGANIZATION_f1": 0.9211356466876972, + "eval_ORGANIZATION_f1": 0.9182389937106918, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9182389937106918, + "eval_ORGANIZATION_precision": 0.9125, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03643361106514931, - "eval_overall_accuracy": 0.9890200384298655, - "eval_overall_f1": 0.9461827284105131, - "eval_overall_precision": 0.942643391521197, + "eval_loss": 0.03439299017190933, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9426433915211971, + "eval_overall_precision": 0.9356435643564357, "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6277, - "eval_samples_per_second": 270.809, - "eval_steps_per_second": 4.779, + "eval_runtime": 0.2803, + "eval_samples_per_second": 606.413, + "eval_steps_per_second": 10.701, "step": 5760 }, { "epoch": 61.0, - "grad_norm": 0.551049530506134, + "grad_norm": 0.5373579263687134, "learning_rate": 1.9500000000000003e-05, - "loss": 0.0357, + "loss": 0.0362, "step": 5856 }, { "epoch": 61.0, - "eval_LOCATION_f1": 0.9310344827586207, + "eval_LOCATION_f1": 0.927038626609442, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9310344827586207, + "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9206349206349206, + "eval_ORGANIZATION_f1": 0.9137380191693291, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9235668789808917, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9225806451612903, + "eval_ORGANIZATION_recall": 0.9050632911392406, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03617864474654198, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9447236180904522, - "eval_overall_precision": 0.9447236180904522, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6313, - "eval_samples_per_second": 269.281, - "eval_steps_per_second": 4.752, + "eval_loss": 0.03558684140443802, + "eval_overall_accuracy": 0.9879220422728521, + "eval_overall_f1": 0.9408805031446541, + "eval_overall_precision": 0.9420654911838791, + "eval_overall_recall": 0.9396984924623115, + "eval_runtime": 0.2814, + "eval_samples_per_second": 604.106, + "eval_steps_per_second": 10.661, "step": 5856 }, { "epoch": 62.0, - "grad_norm": 0.6859171986579895, + "grad_norm": 1.3705915212631226, "learning_rate": 1.9e-05, - "loss": 0.0342, + "loss": 0.037, "step": 5952 }, { @@ -1912,192 +1912,192 @@ "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.923728813559322, "eval_LOCATION_recall": 0.9396551724137931, - "eval_ORGANIZATION_f1": 0.9240506329113924, + "eval_ORGANIZATION_f1": 0.910828025477707, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9240506329113924, - "eval_ORGANIZATION_recall": 0.9240506329113924, - "eval_PERSON_f1": 0.9840000000000001, + "eval_ORGANIZATION_precision": 0.9166666666666666, + "eval_ORGANIZATION_recall": 0.9050632911392406, + "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, - "eval_PERSON_precision": 0.9761904761904762, + "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03858007490634918, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.9450000000000001, - "eval_overall_precision": 0.9402985074626866, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.63, - "eval_samples_per_second": 269.861, - "eval_steps_per_second": 4.762, + "eval_loss": 0.03602343425154686, + "eval_overall_accuracy": 0.9881965413121054, + "eval_overall_f1": 0.9410288582183186, + "eval_overall_precision": 0.9398496240601504, + "eval_overall_recall": 0.9422110552763819, + "eval_runtime": 0.2805, + "eval_samples_per_second": 606.05, + "eval_steps_per_second": 10.695, "step": 5952 }, { "epoch": 63.0, - "grad_norm": 0.6219615340232849, + "grad_norm": 0.5670416355133057, "learning_rate": 1.85e-05, - "loss": 0.0363, + "loss": 0.0386, "step": 6048 }, { "epoch": 63.0, - "eval_LOCATION_f1": 0.927038626609442, + "eval_LOCATION_f1": 0.9310344827586207, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9230769230769231, + "eval_LOCATION_precision": 0.9310344827586207, "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.930379746835443, + "eval_ORGANIZATION_f1": 0.9367088607594937, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.930379746835443, - "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_ORGANIZATION_precision": 0.9367088607594937, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03788018971681595, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9473684210526316, - "eval_overall_precision": 0.945, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6257, - "eval_samples_per_second": 271.683, - "eval_steps_per_second": 4.794, + "eval_loss": 0.036378536373376846, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9510664993726474, + "eval_overall_precision": 0.949874686716792, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2812, + "eval_samples_per_second": 604.53, + "eval_steps_per_second": 10.668, "step": 6048 }, { "epoch": 64.0, - "grad_norm": 1.1038762331008911, + "grad_norm": 1.2524464130401611, "learning_rate": 1.8e-05, - "loss": 0.0355, + "loss": 0.0365, "step": 6144 }, { "epoch": 64.0, - "eval_LOCATION_f1": 0.9310344827586207, + "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9310344827586207, + "eval_LOCATION_precision": 0.9152542372881356, "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9142857142857144, + "eval_ORGANIZATION_f1": 0.9260450160771704, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9171974522292994, + "eval_ORGANIZATION_precision": 0.9411764705882353, "eval_ORGANIZATION_recall": 0.9113924050632911, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.036534231156110764, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9422110552763819, - "eval_overall_precision": 0.9422110552763819, + "eval_loss": 0.036032553762197495, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.9445843828715367, + "eval_overall_precision": 0.946969696969697, "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6276, - "eval_samples_per_second": 270.88, - "eval_steps_per_second": 4.78, + "eval_runtime": 0.282, + "eval_samples_per_second": 602.84, + "eval_steps_per_second": 10.638, "step": 6144 }, { "epoch": 65.0, - "grad_norm": 0.6325670480728149, + "grad_norm": 0.5007239580154419, "learning_rate": 1.75e-05, - "loss": 0.0353, + "loss": 0.0347, "step": 6240 }, { "epoch": 65.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.9316239316239315, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9240506329113924, + "eval_LOCATION_precision": 0.923728813559322, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9294871794871796, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9240506329113924, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9415584415584416, + "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03730296343564987, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9447236180904522, - "eval_overall_precision": 0.9447236180904522, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6323, - "eval_samples_per_second": 268.873, - "eval_steps_per_second": 4.745, + "eval_loss": 0.035410117357969284, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.9484276729559749, + "eval_overall_precision": 0.9496221662468514, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.2862, + "eval_samples_per_second": 594.083, + "eval_steps_per_second": 10.484, "step": 6240 }, { "epoch": 66.0, - "grad_norm": 0.7639645934104919, + "grad_norm": 0.7251582741737366, "learning_rate": 1.7000000000000003e-05, - "loss": 0.0343, + "loss": 0.0393, "step": 6336 }, { "epoch": 66.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.9396551724137931, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9308176100628932, + "eval_LOCATION_precision": 0.9396551724137931, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.926517571884984, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.925, - "eval_ORGANIZATION_recall": 0.9367088607594937, + "eval_ORGANIZATION_precision": 0.9354838709677419, + "eval_ORGANIZATION_recall": 0.9177215189873418, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03759036585688591, + "eval_loss": 0.036589812487363815, "eval_overall_accuracy": 0.9887455393906122, - "eval_overall_f1": 0.9473684210526316, - "eval_overall_precision": 0.945, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6238, - "eval_samples_per_second": 272.526, - "eval_steps_per_second": 4.809, + "eval_overall_f1": 0.9496221662468514, + "eval_overall_precision": 0.952020202020202, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.2814, + "eval_samples_per_second": 604.074, + "eval_steps_per_second": 10.66, "step": 6336 }, { "epoch": 67.0, - "grad_norm": 1.125841736793518, + "grad_norm": 1.2316526174545288, "learning_rate": 1.65e-05, - "loss": 0.0347, + "loss": 0.0359, "step": 6432 }, { "epoch": 67.0, - "eval_LOCATION_f1": 0.927038626609442, + "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9230769230769231, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9211356466876972, + "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9240506329113924, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9182389937106918, + "eval_ORGANIZATION_precision": 0.9240506329113924, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03600994125008583, - "eval_overall_accuracy": 0.9890200384298655, - "eval_overall_f1": 0.9436795994993743, - "eval_overall_precision": 0.940149625935162, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6257, - "eval_samples_per_second": 271.681, - "eval_steps_per_second": 4.794, + "eval_loss": 0.034778717905282974, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9473684210526316, + "eval_overall_precision": 0.945, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.2795, + "eval_samples_per_second": 608.215, + "eval_steps_per_second": 10.733, "step": 6432 }, { "epoch": 68.0, - "grad_norm": 1.1081820726394653, + "grad_norm": 0.5684135556221008, "learning_rate": 1.6000000000000003e-05, - "loss": 0.0339, + "loss": 0.0331, "step": 6528 }, { "epoch": 68.0, - "eval_LOCATION_f1": 0.9310344827586207, + "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9310344827586207, - "eval_LOCATION_recall": 0.9310344827586207, + "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9177215189873418, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9177215189873418, @@ -2106,83 +2106,83 @@ "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03698847442865372, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.9435382685069008, - "eval_overall_precision": 0.9423558897243107, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6327, - "eval_samples_per_second": 268.707, - "eval_steps_per_second": 4.742, + "eval_loss": 0.03474760055541992, + "eval_overall_accuracy": 0.9890200384298655, + "eval_overall_f1": 0.9448621553884713, + "eval_overall_precision": 0.9425, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.2799, + "eval_samples_per_second": 607.468, + "eval_steps_per_second": 10.72, "step": 6528 }, { "epoch": 69.0, - "grad_norm": 0.5234565138816833, + "grad_norm": 0.7760136127471924, "learning_rate": 1.55e-05, - "loss": 0.0357, + "loss": 0.0344, "step": 6624 }, { "epoch": 69.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9142857142857144, + "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9333333333333332, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9171974522292994, - "eval_ORGANIZATION_recall": 0.9113924050632911, + "eval_ORGANIZATION_precision": 0.9363057324840764, + "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03598235175013542, - "eval_overall_accuracy": 0.9879220422728521, - "eval_overall_f1": 0.9408805031446541, - "eval_overall_precision": 0.9420654911838791, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6389, - "eval_samples_per_second": 266.081, - "eval_steps_per_second": 4.696, + "eval_loss": 0.03407077118754387, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.950943396226415, + "eval_overall_precision": 0.9521410579345088, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.2833, + "eval_samples_per_second": 600.147, + "eval_steps_per_second": 10.591, "step": 6624 }, { "epoch": 70.0, - "grad_norm": 1.084986686706543, + "grad_norm": 1.5560885667800903, "learning_rate": 1.5e-05, - "loss": 0.0339, + "loss": 0.0349, "step": 6720 }, { "epoch": 70.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.9396551724137931, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9240506329113924, + "eval_LOCATION_precision": 0.9396551724137931, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9240506329113924, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9426751592356688, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.035142358392477036, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.9447236180904522, - "eval_overall_precision": 0.9447236180904522, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.628, - "eval_samples_per_second": 270.695, - "eval_steps_per_second": 4.777, + "eval_loss": 0.034501850605010986, + "eval_overall_accuracy": 0.990118034586879, + "eval_overall_f1": 0.9547738693467337, + "eval_overall_precision": 0.9547738693467337, + "eval_overall_recall": 0.9547738693467337, + "eval_runtime": 0.28, + "eval_samples_per_second": 607.101, + "eval_steps_per_second": 10.714, "step": 6720 }, { "epoch": 71.0, - "grad_norm": 1.4800338745117188, + "grad_norm": 0.7581639885902405, "learning_rate": 1.45e-05, - "loss": 0.0354, + "loss": 0.0349, "step": 6816 }, { @@ -2191,29 +2191,29 @@ "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9310344827586207, "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9299363057324842, + "eval_ORGANIZATION_f1": 0.926984126984127, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9358974358974359, + "eval_ORGANIZATION_precision": 0.9299363057324841, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03501693159341812, - "eval_overall_accuracy": 0.9887455393906122, - "eval_overall_f1": 0.9484276729559749, - "eval_overall_precision": 0.9496221662468514, + "eval_loss": 0.03540712222456932, + "eval_overall_accuracy": 0.9884710403513588, + "eval_overall_f1": 0.9472361809045227, + "eval_overall_precision": 0.9472361809045227, "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6364, - "eval_samples_per_second": 267.144, - "eval_steps_per_second": 4.714, + "eval_runtime": 0.2795, + "eval_samples_per_second": 608.239, + "eval_steps_per_second": 10.734, "step": 6816 }, { "epoch": 72.0, - "grad_norm": 0.46930524706840515, + "grad_norm": 0.30566951632499695, "learning_rate": 1.4000000000000001e-05, - "loss": 0.0345, + "loss": 0.0342, "step": 6912 }, { @@ -2222,91 +2222,91 @@ "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.923728813559322, "eval_LOCATION_recall": 0.9396551724137931, - "eval_ORGANIZATION_f1": 0.9211356466876972, + "eval_ORGANIZATION_f1": 0.926984126984127, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9182389937106918, + "eval_ORGANIZATION_precision": 0.9299363057324841, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.0342424102127552, - "eval_overall_accuracy": 0.9895690365083722, - "eval_overall_f1": 0.9450000000000001, - "eval_overall_precision": 0.9402985074626866, + "eval_loss": 0.034342918545007706, + "eval_overall_accuracy": 0.9887455393906122, + "eval_overall_f1": 0.9473684210526316, + "eval_overall_precision": 0.945, "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.626, - "eval_samples_per_second": 271.587, - "eval_steps_per_second": 4.793, + "eval_runtime": 0.2798, + "eval_samples_per_second": 607.48, + "eval_steps_per_second": 10.72, "step": 6912 }, { "epoch": 73.0, - "grad_norm": 1.7347519397735596, + "grad_norm": 1.0282853841781616, "learning_rate": 1.3500000000000001e-05, - "loss": 0.0346, + "loss": 0.0333, "step": 7008 }, { "epoch": 73.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9363057324840764, + "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9240506329113924, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9423076923076923, - "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_ORGANIZATION_precision": 0.9240506329113924, + "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03550710529088974, + "eval_loss": 0.03536517545580864, "eval_overall_accuracy": 0.9890200384298655, - "eval_overall_f1": 0.9496221662468514, - "eval_overall_precision": 0.952020202020202, + "eval_overall_f1": 0.9472361809045227, + "eval_overall_precision": 0.9472361809045227, "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6305, - "eval_samples_per_second": 269.612, - "eval_steps_per_second": 4.758, + "eval_runtime": 0.2817, + "eval_samples_per_second": 603.375, + "eval_steps_per_second": 10.648, "step": 7008 }, { "epoch": 74.0, - "grad_norm": 0.18666690587997437, + "grad_norm": 3.3504393100738525, "learning_rate": 1.3000000000000001e-05, - "loss": 0.0322, + "loss": 0.0332, "step": 7104 }, { "epoch": 74.0, - "eval_LOCATION_f1": 0.935064935064935, + "eval_LOCATION_f1": 0.927038626609442, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_precision": 0.9230769230769231, "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.939297124600639, + "eval_ORGANIZATION_f1": 0.9240506329113924, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9483870967741935, - "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_ORGANIZATION_precision": 0.9240506329113924, + "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03586093708872795, - "eval_overall_accuracy": 0.9895690365083722, - "eval_overall_f1": 0.953341740226986, - "eval_overall_precision": 0.9569620253164557, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6297, - "eval_samples_per_second": 269.97, - "eval_steps_per_second": 4.764, + "eval_loss": 0.0346221849322319, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9448621553884713, + "eval_overall_precision": 0.9425, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.2789, + "eval_samples_per_second": 609.459, + "eval_steps_per_second": 10.755, "step": 7104 }, { "epoch": 75.0, - "grad_norm": 1.8562440872192383, + "grad_norm": 0.7059272527694702, "learning_rate": 1.25e-05, - "loss": 0.034, + "loss": 0.0346, "step": 7200 }, { @@ -2315,122 +2315,122 @@ "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9310344827586207, "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9329073482428115, + "eval_ORGANIZATION_f1": 0.9274447949526813, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9419354838709677, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9245283018867925, + "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03533979505300522, - "eval_overall_accuracy": 0.9887455393906122, - "eval_overall_f1": 0.9496221662468514, - "eval_overall_precision": 0.952020202020202, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6258, - "eval_samples_per_second": 271.655, - "eval_steps_per_second": 4.794, + "eval_loss": 0.0342298299074173, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9473684210526316, + "eval_overall_precision": 0.945, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.2836, + "eval_samples_per_second": 599.44, + "eval_steps_per_second": 10.578, "step": 7200 }, { "epoch": 76.0, - "grad_norm": 0.6034942865371704, + "grad_norm": 1.0325454473495483, "learning_rate": 1.2e-05, - "loss": 0.0327, + "loss": 0.0334, "step": 7296 }, { "epoch": 76.0, - "eval_LOCATION_f1": 0.935064935064935, + "eval_LOCATION_f1": 0.9224137931034483, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9391304347826087, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9363057324840764, + "eval_LOCATION_precision": 0.9224137931034483, + "eval_LOCATION_recall": 0.9224137931034483, + "eval_ORGANIZATION_f1": 0.9308176100628932, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9423076923076923, - "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_ORGANIZATION_precision": 0.925, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03656287491321564, - "eval_overall_accuracy": 0.9890200384298655, - "eval_overall_f1": 0.9521410579345088, - "eval_overall_precision": 0.9545454545454546, + "eval_loss": 0.0346398688852787, + "eval_overall_accuracy": 0.9903925336261323, + "eval_overall_f1": 0.9461827284105131, + "eval_overall_precision": 0.942643391521197, "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6291, - "eval_samples_per_second": 270.237, - "eval_steps_per_second": 4.769, + "eval_runtime": 0.2805, + "eval_samples_per_second": 605.977, + "eval_steps_per_second": 10.694, "step": 7296 }, { "epoch": 77.0, - "grad_norm": 0.7069413661956787, + "grad_norm": 0.531468391418457, "learning_rate": 1.1500000000000002e-05, - "loss": 0.032, + "loss": 0.034, "step": 7392 }, { "epoch": 77.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.9396551724137931, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9240506329113924, + "eval_LOCATION_precision": 0.9396551724137931, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.926984126984127, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9299363057324841, "eval_ORGANIZATION_recall": 0.9240506329113924, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03621837496757507, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9447236180904522, - "eval_overall_precision": 0.9447236180904522, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6257, - "eval_samples_per_second": 271.697, - "eval_steps_per_second": 4.795, + "eval_loss": 0.03495112434029579, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.949748743718593, + "eval_overall_precision": 0.949748743718593, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.2841, + "eval_samples_per_second": 598.465, + "eval_steps_per_second": 10.561, "step": 7392 }, { "epoch": 78.0, - "grad_norm": 0.9935917854309082, + "grad_norm": 0.8816949129104614, "learning_rate": 1.1000000000000001e-05, - "loss": 0.0312, + "loss": 0.0341, "step": 7488 }, { "epoch": 78.0, - "eval_LOCATION_f1": 0.9224137931034483, + "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9224137931034483, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9201277955271565, + "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9333333333333332, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9290322580645162, - "eval_ORGANIZATION_recall": 0.9113924050632911, + "eval_ORGANIZATION_precision": 0.9363057324840764, + "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03663404658436775, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9420654911838791, - "eval_overall_precision": 0.9444444444444444, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6304, - "eval_samples_per_second": 269.69, - "eval_steps_per_second": 4.759, + "eval_loss": 0.034044817090034485, + "eval_overall_accuracy": 0.9903925336261323, + "eval_overall_f1": 0.9510664993726474, + "eval_overall_precision": 0.949874686716792, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2793, + "eval_samples_per_second": 608.583, + "eval_steps_per_second": 10.74, "step": 7488 }, { "epoch": 79.0, - "grad_norm": 0.24092857539653778, + "grad_norm": 0.5149128437042236, "learning_rate": 1.05e-05, - "loss": 0.03, + "loss": 0.033, "step": 7584 }, { @@ -2439,60 +2439,60 @@ "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9304347826086956, "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9240506329113924, + "eval_ORGANIZATION_f1": 0.9308176100628932, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9240506329113924, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.925, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03647984191775322, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9447236180904522, - "eval_overall_precision": 0.9447236180904522, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6175, - "eval_samples_per_second": 275.317, - "eval_steps_per_second": 4.859, + "eval_loss": 0.034786492586135864, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9473684210526316, + "eval_overall_precision": 0.945, + "eval_overall_recall": 0.949748743718593, + "eval_runtime": 0.2819, + "eval_samples_per_second": 602.992, + "eval_steps_per_second": 10.641, "step": 7584 }, { "epoch": 80.0, - "grad_norm": 0.6495091915130615, + "grad_norm": 1.0311975479125977, "learning_rate": 1e-05, - "loss": 0.0317, + "loss": 0.0308, "step": 7680 }, { "epoch": 80.0, - "eval_LOCATION_f1": 0.9310344827586207, + "eval_LOCATION_f1": 0.9137931034482759, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9310344827586207, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9363057324840764, + "eval_LOCATION_precision": 0.9137931034482759, + "eval_LOCATION_recall": 0.9137931034482759, + "eval_ORGANIZATION_f1": 0.9278996865203761, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9423076923076923, - "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_ORGANIZATION_precision": 0.9192546583850931, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.037438035011291504, - "eval_overall_accuracy": 0.9887455393906122, - "eval_overall_f1": 0.950943396226415, - "eval_overall_precision": 0.9521410579345088, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6252, - "eval_samples_per_second": 271.933, - "eval_steps_per_second": 4.799, + "eval_loss": 0.0336722694337368, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9425, + "eval_overall_precision": 0.9378109452736318, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.2809, + "eval_samples_per_second": 605.269, + "eval_steps_per_second": 10.681, "step": 7680 }, { "epoch": 81.0, - "grad_norm": 0.7272472381591797, + "grad_norm": 0.3829832971096039, "learning_rate": 9.5e-06, - "loss": 0.0324, + "loss": 0.031, "step": 7776 }, { @@ -2501,37 +2501,37 @@ "eval_LOCATION_number": 116, "eval_LOCATION_precision": 0.9224137931034483, "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9337539432176657, + "eval_ORGANIZATION_f1": 0.9278996865203761, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9308176100628931, + "eval_ORGANIZATION_precision": 0.9192546583850931, "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03790738806128502, - "eval_overall_accuracy": 0.9887455393906122, - "eval_overall_f1": 0.9473684210526316, - "eval_overall_precision": 0.945, + "eval_loss": 0.03411416336894035, + "eval_overall_accuracy": 0.990118034586879, + "eval_overall_f1": 0.9450000000000001, + "eval_overall_precision": 0.9402985074626866, "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.7303, - "eval_samples_per_second": 232.793, - "eval_steps_per_second": 4.108, + "eval_runtime": 0.2823, + "eval_samples_per_second": 602.183, + "eval_steps_per_second": 10.627, "step": 7776 }, { "epoch": 82.0, - "grad_norm": 0.4394896626472473, + "grad_norm": 0.5665716528892517, "learning_rate": 9e-06, - "loss": 0.0309, + "loss": 0.0315, "step": 7872 }, { "epoch": 82.0, - "eval_LOCATION_f1": 0.9224137931034483, + "eval_LOCATION_f1": 0.9316239316239315, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9224137931034483, - "eval_LOCATION_recall": 0.9224137931034483, + "eval_LOCATION_precision": 0.923728813559322, + "eval_LOCATION_recall": 0.9396551724137931, "eval_ORGANIZATION_f1": 0.9333333333333332, "eval_ORGANIZATION_number": 158, "eval_ORGANIZATION_precision": 0.9363057324840764, @@ -2540,582 +2540,582 @@ "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.037485770881175995, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9472361809045227, - "eval_overall_precision": 0.9472361809045227, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.63, - "eval_samples_per_second": 269.821, - "eval_steps_per_second": 4.762, + "eval_loss": 0.03403652459383011, + "eval_overall_accuracy": 0.9903925336261323, + "eval_overall_f1": 0.9498746867167919, + "eval_overall_precision": 0.9475, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2825, + "eval_samples_per_second": 601.815, + "eval_steps_per_second": 10.62, "step": 7872 }, { "epoch": 83.0, - "grad_norm": 1.6514469385147095, + "grad_norm": 0.9708644151687622, "learning_rate": 8.500000000000002e-06, - "loss": 0.0333, + "loss": 0.0321, "step": 7968 }, { "epoch": 83.0, - "eval_LOCATION_f1": 0.9177489177489179, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9217391304347826, - "eval_LOCATION_recall": 0.9137931034482759, - "eval_ORGANIZATION_f1": 0.9235668789808917, + "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9367088607594937, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9294871794871795, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9367088607594937, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03762008622288704, - "eval_overall_accuracy": 0.9879220422728521, - "eval_overall_f1": 0.9420654911838791, - "eval_overall_precision": 0.9444444444444444, - "eval_overall_recall": 0.9396984924623115, - "eval_runtime": 0.6209, - "eval_samples_per_second": 273.793, - "eval_steps_per_second": 4.832, + "eval_loss": 0.03434957191348076, + "eval_overall_accuracy": 0.990118034586879, + "eval_overall_f1": 0.9522613065326633, + "eval_overall_precision": 0.9522613065326633, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2812, + "eval_samples_per_second": 604.564, + "eval_steps_per_second": 10.669, "step": 7968 }, { "epoch": 84.0, - "grad_norm": 1.4008151292800903, + "grad_norm": 1.0318374633789062, "learning_rate": 8.000000000000001e-06, - "loss": 0.0331, + "loss": 0.0317, "step": 8064 }, { "epoch": 84.0, - "eval_LOCATION_f1": 0.927038626609442, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9230769230769231, + "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9333333333333332, + "eval_ORGANIZATION_f1": 0.9367088607594937, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9363057324840764, - "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_ORGANIZATION_precision": 0.9367088607594937, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.037528783082962036, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.9485570890840652, - "eval_overall_precision": 0.9473684210526315, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6239, - "eval_samples_per_second": 272.485, - "eval_steps_per_second": 4.809, + "eval_loss": 0.03397549316287041, + "eval_overall_accuracy": 0.990118034586879, + "eval_overall_f1": 0.9522613065326633, + "eval_overall_precision": 0.9522613065326633, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2802, + "eval_samples_per_second": 606.677, + "eval_steps_per_second": 10.706, "step": 8064 }, { "epoch": 85.0, - "grad_norm": 0.8886189460754395, + "grad_norm": 1.0523338317871094, "learning_rate": 7.5e-06, - "loss": 0.0313, + "loss": 0.0324, "step": 8160 }, { "epoch": 85.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.9184549356223175, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, + "eval_LOCATION_precision": 0.9145299145299145, "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.926984126984127, + "eval_ORGANIZATION_f1": 0.9245283018867925, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9299363057324841, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.91875, + "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03677176684141159, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.9459119496855346, - "eval_overall_precision": 0.947103274559194, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6275, - "eval_samples_per_second": 270.908, - "eval_steps_per_second": 4.781, + "eval_loss": 0.033996351063251495, + "eval_overall_accuracy": 0.9892945374691189, + "eval_overall_f1": 0.9425, + "eval_overall_precision": 0.9378109452736318, + "eval_overall_recall": 0.9472361809045227, + "eval_runtime": 0.2808, + "eval_samples_per_second": 605.465, + "eval_steps_per_second": 10.685, "step": 8160 }, { "epoch": 86.0, - "grad_norm": 0.4647173285484314, + "grad_norm": 0.45407137274742126, "learning_rate": 7.000000000000001e-06, - "loss": 0.031, + "loss": 0.0317, "step": 8256 }, { "epoch": 86.0, - "eval_LOCATION_f1": 0.927038626609442, + "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9230769230769231, - "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9333333333333332, + "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9363057324840764, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9363057324840764, + "eval_ORGANIZATION_precision": 0.9423076923076923, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.0371999628841877, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.9485570890840652, - "eval_overall_precision": 0.9473684210526315, - "eval_overall_recall": 0.949748743718593, - "eval_runtime": 0.6241, - "eval_samples_per_second": 272.404, - "eval_steps_per_second": 4.807, + "eval_loss": 0.03390591964125633, + "eval_overall_accuracy": 0.990118034586879, + "eval_overall_f1": 0.9522613065326633, + "eval_overall_precision": 0.9522613065326633, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2806, + "eval_samples_per_second": 605.825, + "eval_steps_per_second": 10.691, "step": 8256 }, { "epoch": 87.0, - "grad_norm": 0.5002410411834717, + "grad_norm": 0.26913008093833923, "learning_rate": 6.5000000000000004e-06, - "loss": 0.0325, + "loss": 0.0308, "step": 8352 }, { "epoch": 87.0, - "eval_LOCATION_f1": 0.9224137931034483, + "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9224137931034483, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9333333333333332, + "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.9363057324840764, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9363057324840764, + "eval_ORGANIZATION_precision": 0.9423076923076923, "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03664541244506836, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9472361809045227, - "eval_overall_precision": 0.9472361809045227, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6476, - "eval_samples_per_second": 262.503, - "eval_steps_per_second": 4.632, + "eval_loss": 0.03465632349252701, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9522613065326633, + "eval_overall_precision": 0.9522613065326633, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.28, + "eval_samples_per_second": 607.249, + "eval_steps_per_second": 10.716, "step": 8352 }, { "epoch": 88.0, - "grad_norm": 1.6027255058288574, + "grad_norm": 0.30417150259017944, "learning_rate": 6e-06, - "loss": 0.0309, + "loss": 0.0311, "step": 8448 }, { "epoch": 88.0, - "eval_LOCATION_f1": 0.9224137931034483, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9224137931034483, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9333333333333332, + "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9367088607594937, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9363057324840764, - "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_ORGANIZATION_precision": 0.9367088607594937, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.0366690568625927, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9472361809045227, - "eval_overall_precision": 0.9472361809045227, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6246, - "eval_samples_per_second": 272.173, - "eval_steps_per_second": 4.803, + "eval_loss": 0.034424372017383575, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9522613065326633, + "eval_overall_precision": 0.9522613065326633, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2814, + "eval_samples_per_second": 604.076, + "eval_steps_per_second": 10.66, "step": 8448 }, { "epoch": 89.0, - "grad_norm": 1.264343023300171, + "grad_norm": 0.7772185206413269, "learning_rate": 5.500000000000001e-06, - "loss": 0.0298, + "loss": 0.0295, "step": 8544 }, { "epoch": 89.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9235668789808917, + "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9294871794871795, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9426751592356688, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03642358258366585, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9445843828715367, - "eval_overall_precision": 0.946969696969697, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6228, - "eval_samples_per_second": 272.971, - "eval_steps_per_second": 4.817, + "eval_loss": 0.034607380628585815, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9534591194968555, + "eval_overall_precision": 0.9546599496221663, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2806, + "eval_samples_per_second": 605.778, + "eval_steps_per_second": 10.69, "step": 8544 }, { "epoch": 90.0, - "grad_norm": 1.2060728073120117, + "grad_norm": 0.6779617071151733, "learning_rate": 5e-06, - "loss": 0.03, + "loss": 0.0304, "step": 8640 }, { "epoch": 90.0, - "eval_LOCATION_f1": 0.9310344827586207, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9310344827586207, + "eval_LOCATION_precision": 0.9391304347826087, "eval_LOCATION_recall": 0.9310344827586207, - "eval_ORGANIZATION_f1": 0.9235668789808917, + "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9294871794871795, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9426751592356688, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03646455332636833, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.9459119496855346, - "eval_overall_precision": 0.947103274559194, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.623, - "eval_samples_per_second": 272.869, - "eval_steps_per_second": 4.815, + "eval_loss": 0.03426329791545868, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9534591194968555, + "eval_overall_precision": 0.9546599496221663, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2819, + "eval_samples_per_second": 603.089, + "eval_steps_per_second": 10.643, "step": 8640 }, { "epoch": 91.0, - "grad_norm": 1.7125660181045532, + "grad_norm": 1.0395246744155884, "learning_rate": 4.5e-06, - "loss": 0.0317, + "loss": 0.0315, "step": 8736 }, { "epoch": 91.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9206349206349206, + "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9235668789808917, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9426751592356688, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03602577745914459, - "eval_overall_accuracy": 0.9879220422728521, - "eval_overall_f1": 0.9433962264150944, - "eval_overall_precision": 0.9445843828715366, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6244, - "eval_samples_per_second": 272.252, - "eval_steps_per_second": 4.804, + "eval_loss": 0.03426933288574219, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9534591194968555, + "eval_overall_precision": 0.9546599496221663, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2786, + "eval_samples_per_second": 610.112, + "eval_steps_per_second": 10.767, "step": 8736 }, { "epoch": 92.0, - "grad_norm": 2.947300434112549, + "grad_norm": 0.45442330837249756, "learning_rate": 4.000000000000001e-06, - "loss": 0.0308, + "loss": 0.0314, "step": 8832 }, { "epoch": 92.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9235668789808917, + "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9294871794871795, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9426751592356688, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03611500561237335, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9445843828715367, - "eval_overall_precision": 0.946969696969697, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6314, - "eval_samples_per_second": 269.227, - "eval_steps_per_second": 4.751, + "eval_loss": 0.03421960771083832, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9534591194968555, + "eval_overall_precision": 0.9546599496221663, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.282, + "eval_samples_per_second": 602.845, + "eval_steps_per_second": 10.638, "step": 8832 }, { "epoch": 93.0, - "grad_norm": 0.5695700645446777, + "grad_norm": 0.4602469801902771, "learning_rate": 3.5000000000000004e-06, - "loss": 0.0282, + "loss": 0.0322, "step": 8928 }, { "epoch": 93.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9235668789808917, + "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9294871794871795, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9426751592356688, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.035832908004522324, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9445843828715367, - "eval_overall_precision": 0.946969696969697, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6207, - "eval_samples_per_second": 273.9, - "eval_steps_per_second": 4.834, + "eval_loss": 0.03397959843277931, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9534591194968555, + "eval_overall_precision": 0.9546599496221663, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2823, + "eval_samples_per_second": 602.296, + "eval_steps_per_second": 10.629, "step": 8928 }, { "epoch": 94.0, - "grad_norm": 1.4625869989395142, + "grad_norm": 0.46450167894363403, "learning_rate": 3e-06, - "loss": 0.03, + "loss": 0.0303, "step": 9024 }, { "epoch": 94.0, - "eval_LOCATION_f1": 0.9224137931034483, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9224137931034483, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9333333333333332, + "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9367088607594937, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9363057324840764, - "eval_ORGANIZATION_recall": 0.930379746835443, + "eval_ORGANIZATION_precision": 0.9367088607594937, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03618810325860977, - "eval_overall_accuracy": 0.9887455393906122, - "eval_overall_f1": 0.9472361809045227, - "eval_overall_precision": 0.9472361809045227, - "eval_overall_recall": 0.9472361809045227, - "eval_runtime": 0.6205, - "eval_samples_per_second": 273.954, - "eval_steps_per_second": 4.834, + "eval_loss": 0.03426538035273552, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9522613065326633, + "eval_overall_precision": 0.9522613065326633, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.281, + "eval_samples_per_second": 604.875, + "eval_steps_per_second": 10.674, "step": 9024 }, { "epoch": 95.0, - "grad_norm": 0.5287604331970215, + "grad_norm": 0.763189971446991, "learning_rate": 2.5e-06, - "loss": 0.0318, + "loss": 0.0316, "step": 9120 }, { "epoch": 95.0, - "eval_LOCATION_f1": 0.9224137931034483, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9224137931034483, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9240506329113924, + "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9367088607594937, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9240506329113924, - "eval_ORGANIZATION_recall": 0.9240506329113924, + "eval_ORGANIZATION_precision": 0.9367088607594937, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.036259282380342484, - "eval_overall_accuracy": 0.9884710403513588, - "eval_overall_f1": 0.9435382685069008, - "eval_overall_precision": 0.9423558897243107, - "eval_overall_recall": 0.9447236180904522, - "eval_runtime": 0.6284, - "eval_samples_per_second": 270.527, - "eval_steps_per_second": 4.774, + "eval_loss": 0.034257952123880386, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9522613065326633, + "eval_overall_precision": 0.9522613065326633, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2819, + "eval_samples_per_second": 603.132, + "eval_steps_per_second": 10.644, "step": 9120 }, { "epoch": 96.0, - "grad_norm": 0.48243898153305054, + "grad_norm": 0.3942119777202606, "learning_rate": 2.0000000000000003e-06, - "loss": 0.0306, + "loss": 0.0317, "step": 9216 }, { "epoch": 96.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.935064935064935, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9235668789808917, + "eval_LOCATION_precision": 0.9391304347826087, + "eval_LOCATION_recall": 0.9310344827586207, + "eval_ORGANIZATION_f1": 0.9396825396825397, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9294871794871795, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9426751592356688, + "eval_ORGANIZATION_recall": 0.9367088607594937, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03604614734649658, - "eval_overall_accuracy": 0.9881965413121054, - "eval_overall_f1": 0.9445843828715367, - "eval_overall_precision": 0.946969696969697, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6296, - "eval_samples_per_second": 270.033, - "eval_steps_per_second": 4.765, + "eval_loss": 0.034202940762043, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9534591194968555, + "eval_overall_precision": 0.9546599496221663, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2798, + "eval_samples_per_second": 607.549, + "eval_steps_per_second": 10.721, "step": 9216 }, { "epoch": 97.0, - "grad_norm": 0.07403044402599335, + "grad_norm": 0.13562001287937164, "learning_rate": 1.5e-06, - "loss": 0.0304, + "loss": 0.0321, "step": 9312 }, { "epoch": 97.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9206349206349206, + "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.939297124600639, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9235668789808917, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9483870967741935, + "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.0361478365957737, - "eval_overall_accuracy": 0.9879220422728521, - "eval_overall_f1": 0.9433962264150944, - "eval_overall_precision": 0.9445843828715366, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6346, - "eval_samples_per_second": 267.904, - "eval_steps_per_second": 4.728, + "eval_loss": 0.03407964110374451, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9534591194968555, + "eval_overall_precision": 0.9546599496221663, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2822, + "eval_samples_per_second": 602.4, + "eval_steps_per_second": 10.631, "step": 9312 }, { "epoch": 98.0, - "grad_norm": 1.3072566986083984, + "grad_norm": 1.3050485849380493, "learning_rate": 1.0000000000000002e-06, - "loss": 0.03, + "loss": 0.0295, "step": 9408 }, { "epoch": 98.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9206349206349206, + "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.939297124600639, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9235668789808917, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9483870967741935, + "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.036295101046562195, - "eval_overall_accuracy": 0.9879220422728521, - "eval_overall_f1": 0.9433962264150944, - "eval_overall_precision": 0.9445843828715366, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6248, - "eval_samples_per_second": 272.07, - "eval_steps_per_second": 4.801, + "eval_loss": 0.03420589491724968, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9534591194968555, + "eval_overall_precision": 0.9546599496221663, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.28, + "eval_samples_per_second": 607.192, + "eval_steps_per_second": 10.715, "step": 9408 }, { "epoch": 99.0, - "grad_norm": 0.8703434467315674, + "grad_norm": 0.41996899247169495, "learning_rate": 5.000000000000001e-07, - "loss": 0.0293, + "loss": 0.031, "step": 9504 }, { "epoch": 99.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9206349206349206, + "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.939297124600639, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9235668789808917, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9483870967741935, + "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.036201201379299164, - "eval_overall_accuracy": 0.9879220422728521, - "eval_overall_f1": 0.9433962264150944, - "eval_overall_precision": 0.9445843828715366, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6278, - "eval_samples_per_second": 270.799, - "eval_steps_per_second": 4.779, + "eval_loss": 0.0341360941529274, + "eval_overall_accuracy": 0.9898435355476256, + "eval_overall_f1": 0.9534591194968555, + "eval_overall_precision": 0.9546599496221663, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.282, + "eval_samples_per_second": 602.821, + "eval_steps_per_second": 10.638, "step": 9504 }, { "epoch": 100.0, - "grad_norm": 0.9420938491821289, + "grad_norm": 0.4611692428588867, "learning_rate": 0.0, - "loss": 0.0303, + "loss": 0.0299, "step": 9600 }, { "epoch": 100.0, - "eval_LOCATION_f1": 0.9264069264069263, + "eval_LOCATION_f1": 0.9356223175965666, "eval_LOCATION_number": 116, - "eval_LOCATION_precision": 0.9304347826086956, - "eval_LOCATION_recall": 0.9224137931034483, - "eval_ORGANIZATION_f1": 0.9206349206349206, + "eval_LOCATION_precision": 0.9316239316239316, + "eval_LOCATION_recall": 0.9396551724137931, + "eval_ORGANIZATION_f1": 0.939297124600639, "eval_ORGANIZATION_number": 158, - "eval_ORGANIZATION_precision": 0.9235668789808917, - "eval_ORGANIZATION_recall": 0.9177215189873418, + "eval_ORGANIZATION_precision": 0.9483870967741935, + "eval_ORGANIZATION_recall": 0.930379746835443, "eval_PERSON_f1": 0.9879518072289156, "eval_PERSON_number": 124, "eval_PERSON_precision": 0.984, "eval_PERSON_recall": 0.9919354838709677, - "eval_loss": 0.03618453070521355, - "eval_overall_accuracy": 0.9879220422728521, - "eval_overall_f1": 0.9433962264150944, - "eval_overall_precision": 0.9445843828715366, - "eval_overall_recall": 0.9422110552763819, - "eval_runtime": 0.6372, - "eval_samples_per_second": 266.78, - "eval_steps_per_second": 4.708, + "eval_loss": 0.0341842956840992, + "eval_overall_accuracy": 0.9895690365083722, + "eval_overall_f1": 0.9534591194968555, + "eval_overall_precision": 0.9546599496221663, + "eval_overall_recall": 0.9522613065326633, + "eval_runtime": 0.2812, + "eval_samples_per_second": 604.608, + "eval_steps_per_second": 10.67, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 3874712599394304.0, - "train_loss": 0.0784850787371397, - "train_runtime": 1037.5835, - "train_samples_per_second": 147.554, - "train_steps_per_second": 9.252 + "train_loss": 0.07820479728281499, + "train_runtime": 511.6876, + "train_samples_per_second": 299.206, + "train_steps_per_second": 18.761 } ], "logging_steps": 500,