diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3930 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 100.0, + "eval_steps": 500, + "global_step": 10600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "grad_norm": 2.031937599182129, + "learning_rate": 4.9500000000000004e-05, + "loss": 1.1728, + "step": 106 + }, + { + "epoch": 1.0, + "eval_LOCATION_f1": 0.0, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.0, + "eval_LOCATION_recall": 0.0, + "eval_ORGANIZATION_f1": 0.0, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.0, + "eval_ORGANIZATION_recall": 0.0, + "eval_PERSON_f1": 0.0, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.0, + "eval_PERSON_recall": 0.0, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.0, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.0, + "eval_TIME_recall": 0.0, + "eval_loss": 0.6954261064529419, + "eval_overall_accuracy": 0.8409311348205626, + "eval_overall_f1": 0.0, + "eval_overall_precision": 0.0, + "eval_overall_recall": 0.0, + "eval_runtime": 0.3378, + "eval_samples_per_second": 553.573, + "eval_steps_per_second": 8.881, + "step": 106 + }, + { + "epoch": 2.0, + "grad_norm": 0.9953563213348389, + "learning_rate": 4.9e-05, + "loss": 0.6741, + "step": 212 + }, + { + "epoch": 2.0, + "eval_LOCATION_f1": 0.0, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.0, + "eval_LOCATION_recall": 0.0, + "eval_ORGANIZATION_f1": 0.0, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.0, + "eval_ORGANIZATION_recall": 0.0, + "eval_PERSON_f1": 0.0375, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.3, + "eval_PERSON_recall": 0.02, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.0, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.0, + "eval_TIME_recall": 0.0, + "eval_loss": 0.562364399433136, + "eval_overall_accuracy": 0.8433559650824443, + "eval_overall_f1": 0.016574585635359115, + "eval_overall_precision": 0.2, + "eval_overall_recall": 0.008645533141210375, + "eval_runtime": 0.3297, + "eval_samples_per_second": 567.242, + "eval_steps_per_second": 9.1, + "step": 212 + }, + { + "epoch": 3.0, + "grad_norm": 0.8606458306312561, + "learning_rate": 4.85e-05, + "loss": 0.5465, + "step": 318 + }, + { + "epoch": 3.0, + "eval_LOCATION_f1": 0.09302325581395349, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.26666666666666666, + "eval_LOCATION_recall": 0.056338028169014086, + "eval_ORGANIZATION_f1": 0.024390243902439025, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.058823529411764705, + "eval_ORGANIZATION_recall": 0.015384615384615385, + "eval_PERSON_f1": 0.34220532319391633, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.39823008849557523, + "eval_PERSON_recall": 0.3, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.391304347826087, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.5, + "eval_TIME_recall": 0.32142857142857145, + "eval_loss": 0.4473896920681, + "eval_overall_accuracy": 0.873666343355965, + "eval_overall_f1": 0.23001949317738793, + "eval_overall_precision": 0.35542168674698793, + "eval_overall_recall": 0.17002881844380405, + "eval_runtime": 0.3314, + "eval_samples_per_second": 564.2, + "eval_steps_per_second": 9.051, + "step": 318 + }, + { + "epoch": 4.0, + "grad_norm": 1.493430256843567, + "learning_rate": 4.8e-05, + "loss": 0.4542, + "step": 424 + }, + { + "epoch": 4.0, + "eval_LOCATION_f1": 0.3716814159292035, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.5, + "eval_LOCATION_recall": 0.29577464788732394, + "eval_ORGANIZATION_f1": 0.1553398058252427, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.21052631578947367, + "eval_ORGANIZATION_recall": 0.12307692307692308, + "eval_PERSON_f1": 0.5045592705167173, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.46368715083798884, + "eval_PERSON_recall": 0.5533333333333333, + "eval_QUANTITY_f1": 0.0, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.0, + "eval_QUANTITY_recall": 0.0, + "eval_TIME_f1": 0.4799999999999999, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.5454545454545454, + "eval_TIME_recall": 0.42857142857142855, + "eval_loss": 0.37795889377593994, + "eval_overall_accuracy": 0.8930649854510184, + "eval_overall_f1": 0.3887147335423197, + "eval_overall_precision": 0.4261168384879725, + "eval_overall_recall": 0.3573487031700288, + "eval_runtime": 0.3324, + "eval_samples_per_second": 562.603, + "eval_steps_per_second": 9.026, + "step": 424 + }, + { + "epoch": 5.0, + "grad_norm": 1.0484815835952759, + "learning_rate": 4.75e-05, + "loss": 0.3965, + "step": 530 + }, + { + "epoch": 5.0, + "eval_LOCATION_f1": 0.496551724137931, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.4864864864864865, + "eval_LOCATION_recall": 0.5070422535211268, + "eval_ORGANIZATION_f1": 0.3703703703703704, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.35714285714285715, + "eval_ORGANIZATION_recall": 0.38461538461538464, + "eval_PERSON_f1": 0.601123595505618, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.5194174757281553, + "eval_PERSON_recall": 0.7133333333333334, + "eval_QUANTITY_f1": 0.16666666666666666, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.18518518518518517, + "eval_QUANTITY_recall": 0.15151515151515152, + "eval_TIME_f1": 0.6071428571428571, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.6071428571428571, + "eval_TIME_recall": 0.6071428571428571, + "eval_loss": 0.3348589539527893, + "eval_overall_accuracy": 0.9119786614936954, + "eval_overall_f1": 0.5053191489361702, + "eval_overall_precision": 0.4691358024691358, + "eval_overall_recall": 0.547550432276657, + "eval_runtime": 0.3282, + "eval_samples_per_second": 569.692, + "eval_steps_per_second": 9.139, + "step": 530 + }, + { + "epoch": 6.0, + "grad_norm": 1.370116114616394, + "learning_rate": 4.7e-05, + "loss": 0.3563, + "step": 636 + }, + { + "epoch": 6.0, + "eval_LOCATION_f1": 0.52, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.4936708860759494, + "eval_LOCATION_recall": 0.5492957746478874, + "eval_ORGANIZATION_f1": 0.4142857142857143, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.38666666666666666, + "eval_ORGANIZATION_recall": 0.4461538461538462, + "eval_PERSON_f1": 0.6666666666666666, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.5648148148148148, + "eval_PERSON_recall": 0.8133333333333334, + "eval_QUANTITY_f1": 0.368421052631579, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.32558139534883723, + "eval_QUANTITY_recall": 0.42424242424242425, + "eval_TIME_f1": 0.6315789473684211, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.6206896551724138, + "eval_TIME_recall": 0.6428571428571429, + "eval_loss": 0.3059276044368744, + "eval_overall_accuracy": 0.9197381183317168, + "eval_overall_f1": 0.5627376425855513, + "eval_overall_precision": 0.502262443438914, + "eval_overall_recall": 0.6397694524495677, + "eval_runtime": 0.3281, + "eval_samples_per_second": 569.928, + "eval_steps_per_second": 9.143, + "step": 636 + }, + { + "epoch": 7.0, + "grad_norm": 3.2192013263702393, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.3237, + "step": 742 + }, + { + "epoch": 7.0, + "eval_LOCATION_f1": 0.5625, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.5056179775280899, + "eval_LOCATION_recall": 0.6338028169014085, + "eval_ORGANIZATION_f1": 0.5040650406504066, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5344827586206896, + "eval_ORGANIZATION_recall": 0.47692307692307695, + "eval_PERSON_f1": 0.725212464589235, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.6305418719211823, + "eval_PERSON_recall": 0.8533333333333334, + "eval_QUANTITY_f1": 0.43373493975903615, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.36, + "eval_QUANTITY_recall": 0.5454545454545454, + "eval_TIME_f1": 0.75, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.75, + "eval_loss": 0.26966217160224915, + "eval_overall_accuracy": 0.9277400581959263, + "eval_overall_f1": 0.6270967741935485, + "eval_overall_precision": 0.5677570093457944, + "eval_overall_recall": 0.7002881844380403, + "eval_runtime": 0.334, + "eval_samples_per_second": 559.834, + "eval_steps_per_second": 8.981, + "step": 742 + }, + { + "epoch": 8.0, + "grad_norm": 0.8027381896972656, + "learning_rate": 4.600000000000001e-05, + "loss": 0.2867, + "step": 848 + }, + { + "epoch": 8.0, + "eval_LOCATION_f1": 0.6, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.5393258426966292, + "eval_LOCATION_recall": 0.676056338028169, + "eval_ORGANIZATION_f1": 0.5035971223021583, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.47297297297297297, + "eval_ORGANIZATION_recall": 0.5384615384615384, + "eval_PERSON_f1": 0.7624633431085044, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.680628272251309, + "eval_PERSON_recall": 0.8666666666666667, + "eval_QUANTITY_f1": 0.4556962025316456, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.391304347826087, + "eval_QUANTITY_recall": 0.5454545454545454, + "eval_TIME_f1": 0.8518518518518519, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8846153846153846, + "eval_TIME_recall": 0.8214285714285714, + "eval_loss": 0.24133449792861938, + "eval_overall_accuracy": 0.9311348205625606, + "eval_overall_f1": 0.6571798188874515, + "eval_overall_precision": 0.596244131455399, + "eval_overall_recall": 0.7319884726224783, + "eval_runtime": 0.3343, + "eval_samples_per_second": 559.301, + "eval_steps_per_second": 8.973, + "step": 848 + }, + { + "epoch": 9.0, + "grad_norm": 3.7063872814178467, + "learning_rate": 4.55e-05, + "loss": 0.2596, + "step": 954 + }, + { + "epoch": 9.0, + "eval_LOCATION_f1": 0.5889570552147239, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.5217391304347826, + "eval_LOCATION_recall": 0.676056338028169, + "eval_ORGANIZATION_f1": 0.5, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.4788732394366197, + "eval_ORGANIZATION_recall": 0.5230769230769231, + "eval_PERSON_f1": 0.7678571428571428, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.6935483870967742, + "eval_PERSON_recall": 0.86, + "eval_QUANTITY_f1": 0.5194805194805195, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.45454545454545453, + "eval_QUANTITY_recall": 0.6060606060606061, + "eval_TIME_f1": 0.8846153846153847, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.9583333333333334, + "eval_TIME_recall": 0.8214285714285714, + "eval_loss": 0.21532496809959412, + "eval_overall_accuracy": 0.9350145489815713, + "eval_overall_f1": 0.6649214659685864, + "eval_overall_precision": 0.6091127098321343, + "eval_overall_recall": 0.7319884726224783, + "eval_runtime": 0.3273, + "eval_samples_per_second": 571.403, + "eval_steps_per_second": 9.167, + "step": 954 + }, + { + "epoch": 10.0, + "grad_norm": 1.085120677947998, + "learning_rate": 4.5e-05, + "loss": 0.2377, + "step": 1060 + }, + { + "epoch": 10.0, + "eval_LOCATION_f1": 0.5903614457831325, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.5157894736842106, + "eval_LOCATION_recall": 0.6901408450704225, + "eval_ORGANIZATION_f1": 0.5138888888888888, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.46835443037974683, + "eval_ORGANIZATION_recall": 0.5692307692307692, + "eval_PERSON_f1": 0.7964071856287426, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7228260869565217, + "eval_PERSON_recall": 0.8866666666666667, + "eval_QUANTITY_f1": 0.48780487804878053, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.40816326530612246, + "eval_QUANTITY_recall": 0.6060606060606061, + "eval_TIME_f1": 0.8727272727272727, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8888888888888888, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.20414884388446808, + "eval_overall_accuracy": 0.9371968962172648, + "eval_overall_f1": 0.6734955185659411, + "eval_overall_precision": 0.6059907834101382, + "eval_overall_recall": 0.7579250720461095, + "eval_runtime": 0.3342, + "eval_samples_per_second": 559.551, + "eval_steps_per_second": 8.977, + "step": 1060 + }, + { + "epoch": 11.0, + "grad_norm": 1.6044801473617554, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.2155, + "step": 1166 + }, + { + "epoch": 11.0, + "eval_LOCATION_f1": 0.6380368098159509, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.5652173913043478, + "eval_LOCATION_recall": 0.7323943661971831, + "eval_ORGANIZATION_f1": 0.5241379310344827, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.475, + "eval_ORGANIZATION_recall": 0.5846153846153846, + "eval_PERSON_f1": 0.8123076923076923, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7542857142857143, + "eval_PERSON_recall": 0.88, + "eval_QUANTITY_f1": 0.4938271604938272, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.4166666666666667, + "eval_QUANTITY_recall": 0.6060606060606061, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8571428571428571, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.18787693977355957, + "eval_overall_accuracy": 0.9396217264791464, + "eval_overall_f1": 0.690909090909091, + "eval_overall_precision": 0.6288416075650118, + "eval_overall_recall": 0.7665706051873199, + "eval_runtime": 0.3322, + "eval_samples_per_second": 562.865, + "eval_steps_per_second": 9.03, + "step": 1166 + }, + { + "epoch": 12.0, + "grad_norm": 1.741855263710022, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.201, + "step": 1272 + }, + { + "epoch": 12.0, + "eval_LOCATION_f1": 0.6585365853658537, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.5806451612903226, + "eval_LOCATION_recall": 0.7605633802816901, + "eval_ORGANIZATION_f1": 0.5466666666666666, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.4823529411764706, + "eval_ORGANIZATION_recall": 0.6307692307692307, + "eval_PERSON_f1": 0.8159509202453988, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7556818181818182, + "eval_PERSON_recall": 0.8866666666666667, + "eval_QUANTITY_f1": 0.4819277108433735, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.4, + "eval_QUANTITY_recall": 0.6060606060606061, + "eval_TIME_f1": 0.8571428571428571, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8571428571428571, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.18443620204925537, + "eval_overall_accuracy": 0.9413191076624636, + "eval_overall_f1": 0.6983311938382541, + "eval_overall_precision": 0.6296296296296297, + "eval_overall_recall": 0.7838616714697406, + "eval_runtime": 0.3338, + "eval_samples_per_second": 560.243, + "eval_steps_per_second": 8.988, + "step": 1272 + }, + { + "epoch": 13.0, + "grad_norm": 0.8449801206588745, + "learning_rate": 4.35e-05, + "loss": 0.1888, + "step": 1378 + }, + { + "epoch": 13.0, + "eval_LOCATION_f1": 0.6835443037974683, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6206896551724138, + "eval_LOCATION_recall": 0.7605633802816901, + "eval_ORGANIZATION_f1": 0.5616438356164384, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5061728395061729, + "eval_ORGANIZATION_recall": 0.6307692307692307, + "eval_PERSON_f1": 0.8275862068965518, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7810650887573964, + "eval_PERSON_recall": 0.88, + "eval_QUANTITY_f1": 0.5128205128205128, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.4444444444444444, + "eval_QUANTITY_recall": 0.6060606060606061, + "eval_TIME_f1": 0.8727272727272727, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8888888888888888, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.16671638190746307, + "eval_overall_accuracy": 0.9439864209505334, + "eval_overall_f1": 0.716931216931217, + "eval_overall_precision": 0.6625916870415648, + "eval_overall_recall": 0.7809798270893372, + "eval_runtime": 0.3283, + "eval_samples_per_second": 569.545, + "eval_steps_per_second": 9.137, + "step": 1378 + }, + { + "epoch": 14.0, + "grad_norm": 2.652808904647827, + "learning_rate": 4.3e-05, + "loss": 0.1831, + "step": 1484 + }, + { + "epoch": 14.0, + "eval_LOCATION_f1": 0.7236842105263157, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6790123456790124, + "eval_LOCATION_recall": 0.7746478873239436, + "eval_ORGANIZATION_f1": 0.5675675675675675, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5060240963855421, + "eval_ORGANIZATION_recall": 0.6461538461538462, + "eval_PERSON_f1": 0.8213166144200627, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7751479289940828, + "eval_PERSON_recall": 0.8733333333333333, + "eval_QUANTITY_f1": 0.5641025641025641, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.4888888888888889, + "eval_QUANTITY_recall": 0.6666666666666666, + "eval_TIME_f1": 0.8727272727272727, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8888888888888888, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.16177679598331451, + "eval_overall_accuracy": 0.9468962172647915, + "eval_overall_f1": 0.7287234042553191, + "eval_overall_precision": 0.6765432098765433, + "eval_overall_recall": 0.7896253602305475, + "eval_runtime": 0.3274, + "eval_samples_per_second": 571.237, + "eval_steps_per_second": 9.164, + "step": 1484 + }, + { + "epoch": 15.0, + "grad_norm": 1.1179120540618896, + "learning_rate": 4.25e-05, + "loss": 0.1719, + "step": 1590 + }, + { + "epoch": 15.0, + "eval_LOCATION_f1": 0.7215189873417721, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6551724137931034, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.6225165562913907, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5465116279069767, + "eval_ORGANIZATION_recall": 0.7230769230769231, + "eval_PERSON_f1": 0.8209876543209876, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.764367816091954, + "eval_PERSON_recall": 0.8866666666666667, + "eval_QUANTITY_f1": 0.6582278481012658, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.5652173913043478, + "eval_QUANTITY_recall": 0.7878787878787878, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.16455210745334625, + "eval_overall_accuracy": 0.9485935984481086, + "eval_overall_f1": 0.748051948051948, + "eval_overall_precision": 0.6808510638297872, + "eval_overall_recall": 0.829971181556196, + "eval_runtime": 0.3291, + "eval_samples_per_second": 568.155, + "eval_steps_per_second": 9.115, + "step": 1590 + }, + { + "epoch": 16.0, + "grad_norm": 1.5096036195755005, + "learning_rate": 4.2e-05, + "loss": 0.1626, + "step": 1696 + }, + { + "epoch": 16.0, + "eval_LOCATION_f1": 0.7307692307692307, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6705882352941176, + "eval_LOCATION_recall": 0.8028169014084507, + "eval_ORGANIZATION_f1": 0.6308724832214766, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5595238095238095, + "eval_ORGANIZATION_recall": 0.7230769230769231, + "eval_PERSON_f1": 0.8238993710691823, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7797619047619048, + "eval_PERSON_recall": 0.8733333333333333, + "eval_QUANTITY_f1": 0.65, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.5531914893617021, + "eval_QUANTITY_recall": 0.7878787878787878, + "eval_TIME_f1": 0.8524590163934426, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7878787878787878, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.16098518669605255, + "eval_overall_accuracy": 0.9481086323957323, + "eval_overall_f1": 0.7513089005235603, + "eval_overall_precision": 0.6882494004796164, + "eval_overall_recall": 0.8270893371757925, + "eval_runtime": 0.3332, + "eval_samples_per_second": 561.268, + "eval_steps_per_second": 9.004, + "step": 1696 + }, + { + "epoch": 17.0, + "grad_norm": 0.7566058039665222, + "learning_rate": 4.15e-05, + "loss": 0.1614, + "step": 1802 + }, + { + "epoch": 17.0, + "eval_LOCATION_f1": 0.7468354430379747, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6781609195402298, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.6357615894039734, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5581395348837209, + "eval_ORGANIZATION_recall": 0.7384615384615385, + "eval_PERSON_f1": 0.8348909657320873, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.783625730994152, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.675, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.574468085106383, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8387096774193549, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7647058823529411, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.15790420770645142, + "eval_overall_accuracy": 0.9498060135790495, + "eval_overall_f1": 0.7616580310880829, + "eval_overall_precision": 0.691764705882353, + "eval_overall_recall": 0.8472622478386167, + "eval_runtime": 0.3333, + "eval_samples_per_second": 560.997, + "eval_steps_per_second": 9.0, + "step": 1802 + }, + { + "epoch": 18.0, + "grad_norm": 1.216138482093811, + "learning_rate": 4.1e-05, + "loss": 0.1558, + "step": 1908 + }, + { + "epoch": 18.0, + "eval_LOCATION_f1": 0.7547169811320754, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6818181818181818, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.6133333333333333, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5411764705882353, + "eval_ORGANIZATION_recall": 0.7076923076923077, + "eval_PERSON_f1": 0.8374999999999999, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.788235294117647, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.6753246753246752, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.5909090909090909, + "eval_QUANTITY_recall": 0.7878787878787878, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.15334509313106537, + "eval_overall_accuracy": 0.9502909796314258, + "eval_overall_f1": 0.7624020887728461, + "eval_overall_precision": 0.6968973747016707, + "eval_overall_recall": 0.8414985590778098, + "eval_runtime": 0.3283, + "eval_samples_per_second": 569.64, + "eval_steps_per_second": 9.139, + "step": 1908 + }, + { + "epoch": 19.0, + "grad_norm": 0.9647671580314636, + "learning_rate": 4.05e-05, + "loss": 0.1534, + "step": 2014 + }, + { + "epoch": 19.0, + "eval_LOCATION_f1": 0.759493670886076, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6896551724137931, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.6363636363636364, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.550561797752809, + "eval_ORGANIZATION_recall": 0.7538461538461538, + "eval_PERSON_f1": 0.8364779874213838, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7916666666666666, + "eval_PERSON_recall": 0.8866666666666667, + "eval_QUANTITY_f1": 0.6842105263157895, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6046511627906976, + "eval_QUANTITY_recall": 0.7878787878787878, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.1524961143732071, + "eval_overall_accuracy": 0.9512609117361784, + "eval_overall_f1": 0.7676240208877285, + "eval_overall_precision": 0.7016706443914081, + "eval_overall_recall": 0.8472622478386167, + "eval_runtime": 0.3358, + "eval_samples_per_second": 556.83, + "eval_steps_per_second": 8.933, + "step": 2014 + }, + { + "epoch": 20.0, + "grad_norm": 1.3781942129135132, + "learning_rate": 4e-05, + "loss": 0.1478, + "step": 2120 + }, + { + "epoch": 20.0, + "eval_LOCATION_f1": 0.7701863354037267, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6888888888888889, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.6293706293706293, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5769230769230769, + "eval_ORGANIZATION_recall": 0.6923076923076923, + "eval_PERSON_f1": 0.8343949044585988, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7987804878048781, + "eval_PERSON_recall": 0.8733333333333333, + "eval_QUANTITY_f1": 0.7500000000000001, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6923076923076923, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.14749658107757568, + "eval_overall_accuracy": 0.9532007759456838, + "eval_overall_f1": 0.7760000000000001, + "eval_overall_precision": 0.7220843672456576, + "eval_overall_recall": 0.8386167146974063, + "eval_runtime": 0.3278, + "eval_samples_per_second": 570.507, + "eval_steps_per_second": 9.153, + "step": 2120 + }, + { + "epoch": 21.0, + "grad_norm": 1.2294446229934692, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.1453, + "step": 2226 + }, + { + "epoch": 21.0, + "eval_LOCATION_f1": 0.751592356687898, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.686046511627907, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.624113475177305, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5789473684210527, + "eval_ORGANIZATION_recall": 0.676923076923077, + "eval_PERSON_f1": 0.8397435897435898, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.808641975308642, + "eval_PERSON_recall": 0.8733333333333333, + "eval_QUANTITY_f1": 0.7777777777777778, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.717948717948718, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.14134016633033752, + "eval_overall_accuracy": 0.9544131910766246, + "eval_overall_f1": 0.7762803234501348, + "eval_overall_precision": 0.7291139240506329, + "eval_overall_recall": 0.829971181556196, + "eval_runtime": 0.3335, + "eval_samples_per_second": 560.718, + "eval_steps_per_second": 8.995, + "step": 2226 + }, + { + "epoch": 22.0, + "grad_norm": 1.5209108591079712, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.1433, + "step": 2332 + }, + { + "epoch": 22.0, + "eval_LOCATION_f1": 0.7730061349693252, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6847826086956522, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.6257668711656441, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5204081632653061, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.83125, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.7823529411764706, + "eval_PERSON_recall": 0.8866666666666667, + "eval_QUANTITY_f1": 0.7631578947368421, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6744186046511628, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.819672131147541, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.7575757575757576, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.15656441450119019, + "eval_overall_accuracy": 0.949078564500485, + "eval_overall_f1": 0.7688378033205621, + "eval_overall_precision": 0.6903669724770642, + "eval_overall_recall": 0.8674351585014409, + "eval_runtime": 0.3276, + "eval_samples_per_second": 570.876, + "eval_steps_per_second": 9.158, + "step": 2332 + }, + { + "epoch": 23.0, + "grad_norm": 1.3234655857086182, + "learning_rate": 3.85e-05, + "loss": 0.1378, + "step": 2438 + }, + { + "epoch": 23.0, + "eval_LOCATION_f1": 0.7643312101910827, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6976744186046512, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.6415094339622641, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5425531914893617, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8471337579617835, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8109756097560976, + "eval_PERSON_recall": 0.8866666666666667, + "eval_QUANTITY_f1": 0.7887323943661972, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7368421052631579, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8813559322033899, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8387096774193549, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.14258012175559998, + "eval_overall_accuracy": 0.9553831231813773, + "eval_overall_f1": 0.7842105263157895, + "eval_overall_precision": 0.7215496368038741, + "eval_overall_recall": 0.8587896253602305, + "eval_runtime": 0.3337, + "eval_samples_per_second": 560.382, + "eval_steps_per_second": 8.99, + "step": 2438 + }, + { + "epoch": 24.0, + "grad_norm": 1.6313447952270508, + "learning_rate": 3.8e-05, + "loss": 0.1366, + "step": 2544 + }, + { + "epoch": 24.0, + "eval_LOCATION_f1": 0.7672955974842768, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6931818181818182, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6486486486486487, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5783132530120482, + "eval_ORGANIZATION_recall": 0.7384615384615385, + "eval_PERSON_f1": 0.8417721518987342, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8012048192771084, + "eval_PERSON_recall": 0.8866666666666667, + "eval_QUANTITY_f1": 0.7671232876712328, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8666666666666666, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8125, + "eval_TIME_recall": 0.9285714285714286, + "eval_loss": 0.14373408257961273, + "eval_overall_accuracy": 0.9532007759456838, + "eval_overall_f1": 0.7830687830687829, + "eval_overall_precision": 0.7237163814180929, + "eval_overall_recall": 0.8530259365994236, + "eval_runtime": 0.3273, + "eval_samples_per_second": 571.255, + "eval_steps_per_second": 9.165, + "step": 2544 + }, + { + "epoch": 25.0, + "grad_norm": 1.8420681953430176, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.1332, + "step": 2650 + }, + { + "epoch": 25.0, + "eval_LOCATION_f1": 0.7692307692307694, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7058823529411765, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.6490066225165563, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5697674418604651, + "eval_ORGANIZATION_recall": 0.7538461538461538, + "eval_PERSON_f1": 0.8407643312101911, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8048780487804879, + "eval_PERSON_recall": 0.88, + "eval_QUANTITY_f1": 0.736842105263158, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6511627906976745, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8333333333333334, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.78125, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.14212451875209808, + "eval_overall_accuracy": 0.9536857419980601, + "eval_overall_f1": 0.7767503302509907, + "eval_overall_precision": 0.7170731707317073, + "eval_overall_recall": 0.8472622478386167, + "eval_runtime": 0.3286, + "eval_samples_per_second": 569.109, + "eval_steps_per_second": 9.13, + "step": 2650 + }, + { + "epoch": 26.0, + "grad_norm": 0.7889411449432373, + "learning_rate": 3.7e-05, + "loss": 0.135, + "step": 2756 + }, + { + "epoch": 26.0, + "eval_LOCATION_f1": 0.7770700636942676, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7093023255813954, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6578947368421053, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5747126436781609, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8498402555910542, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8159509202453987, + "eval_PERSON_recall": 0.8866666666666667, + "eval_QUANTITY_f1": 0.7777777777777778, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.717948717948718, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8474576271186439, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1382077932357788, + "eval_overall_accuracy": 0.9568380213385063, + "eval_overall_f1": 0.7888446215139442, + "eval_overall_precision": 0.7315270935960592, + "eval_overall_recall": 0.8559077809798271, + "eval_runtime": 0.3273, + "eval_samples_per_second": 571.423, + "eval_steps_per_second": 9.167, + "step": 2756 + }, + { + "epoch": 27.0, + "grad_norm": 0.8865845203399658, + "learning_rate": 3.65e-05, + "loss": 0.1306, + "step": 2862 + }, + { + "epoch": 27.0, + "eval_LOCATION_f1": 0.7643312101910827, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6976744186046512, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.6442953020134228, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5714285714285714, + "eval_ORGANIZATION_recall": 0.7384615384615385, + "eval_PERSON_f1": 0.8498402555910542, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8159509202453987, + "eval_PERSON_recall": 0.8866666666666667, + "eval_QUANTITY_f1": 0.7397260273972603, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.675, + "eval_QUANTITY_recall": 0.8181818181818182, + "eval_TIME_f1": 0.8474576271186439, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8064516129032258, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.13908694684505463, + "eval_overall_accuracy": 0.954898157129001, + "eval_overall_f1": 0.7802929427430094, + "eval_overall_precision": 0.7252475247524752, + "eval_overall_recall": 0.8443804034582133, + "eval_runtime": 0.3276, + "eval_samples_per_second": 570.863, + "eval_steps_per_second": 9.158, + "step": 2862 + }, + { + "epoch": 28.0, + "grad_norm": 1.6113708019256592, + "learning_rate": 3.6e-05, + "loss": 0.1279, + "step": 2968 + }, + { + "epoch": 28.0, + "eval_LOCATION_f1": 0.7612903225806451, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7023809523809523, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.6709677419354838, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5777777777777777, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8461538461538461, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8148148148148148, + "eval_PERSON_recall": 0.88, + "eval_QUANTITY_f1": 0.7777777777777778, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.717948717948718, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.13870620727539062, + "eval_overall_accuracy": 0.9551406401551892, + "eval_overall_f1": 0.7872340425531915, + "eval_overall_precision": 0.7308641975308642, + "eval_overall_recall": 0.8530259365994236, + "eval_runtime": 0.334, + "eval_samples_per_second": 559.915, + "eval_steps_per_second": 8.983, + "step": 2968 + }, + { + "epoch": 29.0, + "grad_norm": 0.4429430663585663, + "learning_rate": 3.55e-05, + "loss": 0.1257, + "step": 3074 + }, + { + "epoch": 29.0, + "eval_LOCATION_f1": 0.7564102564102564, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6941176470588235, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.6835443037974683, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5806451612903226, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8461538461538461, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8148148148148148, + "eval_PERSON_recall": 0.88, + "eval_QUANTITY_f1": 0.7777777777777778, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.717948717948718, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8771929824561403, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.13918448984622955, + "eval_overall_accuracy": 0.9539282250242483, + "eval_overall_f1": 0.7894039735099336, + "eval_overall_precision": 0.7303921568627451, + "eval_overall_recall": 0.8587896253602305, + "eval_runtime": 0.3274, + "eval_samples_per_second": 571.152, + "eval_steps_per_second": 9.163, + "step": 3074 + }, + { + "epoch": 30.0, + "grad_norm": 0.9709805250167847, + "learning_rate": 3.5e-05, + "loss": 0.1248, + "step": 3180 + }, + { + "epoch": 30.0, + "eval_LOCATION_f1": 0.7692307692307694, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7058823529411765, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.6845637583892618, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6071428571428571, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8662420382165605, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.7777777777777778, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.717948717948718, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8620689655172413, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8333333333333334, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.13585644960403442, + "eval_overall_accuracy": 0.9580504364694471, + "eval_overall_f1": 0.801068090787717, + "eval_overall_precision": 0.746268656716418, + "eval_overall_recall": 0.8645533141210374, + "eval_runtime": 0.3333, + "eval_samples_per_second": 561.017, + "eval_steps_per_second": 9.0, + "step": 3180 + }, + { + "epoch": 31.0, + "grad_norm": 0.5455430746078491, + "learning_rate": 3.45e-05, + "loss": 0.1244, + "step": 3286 + }, + { + "epoch": 31.0, + "eval_LOCATION_f1": 0.7870967741935485, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7261904761904762, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6799999999999999, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8662420382165605, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.7887323943661972, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7368421052631579, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8771929824561403, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1317688375711441, + "eval_overall_accuracy": 0.9590203685741998, + "eval_overall_f1": 0.8058902275769746, + "eval_overall_precision": 0.7525, + "eval_overall_recall": 0.8674351585014409, + "eval_runtime": 0.3275, + "eval_samples_per_second": 571.008, + "eval_steps_per_second": 9.161, + "step": 3286 + }, + { + "epoch": 32.0, + "grad_norm": 0.8515002131462097, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.1183, + "step": 3392 + }, + { + "epoch": 32.0, + "eval_LOCATION_f1": 0.7721518987341772, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7011494252873564, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6538461538461537, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5604395604395604, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8535031847133757, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8170731707317073, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8771929824561403, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.13729499280452728, + "eval_overall_accuracy": 0.9558680892337537, + "eval_overall_f1": 0.7926023778071334, + "eval_overall_precision": 0.7317073170731707, + "eval_overall_recall": 0.8645533141210374, + "eval_runtime": 0.3283, + "eval_samples_per_second": 569.519, + "eval_steps_per_second": 9.137, + "step": 3392 + }, + { + "epoch": 33.0, + "grad_norm": 1.5603715181350708, + "learning_rate": 3.35e-05, + "loss": 0.1197, + "step": 3498 + }, + { + "epoch": 33.0, + "eval_LOCATION_f1": 0.7721518987341772, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7011494252873564, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6622516556291391, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5813953488372093, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8525641025641025, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8209876543209876, + "eval_PERSON_recall": 0.8866666666666667, + "eval_QUANTITY_f1": 0.7777777777777778, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.717948717948718, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8771929824561403, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.13469155132770538, + "eval_overall_accuracy": 0.9568380213385063, + "eval_overall_f1": 0.7919999999999999, + "eval_overall_precision": 0.7369727047146402, + "eval_overall_recall": 0.8559077809798271, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.973, + "eval_steps_per_second": 9.016, + "step": 3498 + }, + { + "epoch": 34.0, + "grad_norm": 1.131216287612915, + "learning_rate": 3.3e-05, + "loss": 0.1176, + "step": 3604 + }, + { + "epoch": 34.0, + "eval_LOCATION_f1": 0.7770700636942676, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7093023255813954, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6711409395973155, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5952380952380952, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8626198083067093, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8282208588957055, + "eval_PERSON_recall": 0.9, + "eval_QUANTITY_f1": 0.7777777777777778, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.717948717948718, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8771929824561403, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.13136626780033112, + "eval_overall_accuracy": 0.95635305528613, + "eval_overall_f1": 0.7994652406417113, + "eval_overall_precision": 0.7456359102244389, + "eval_overall_recall": 0.861671469740634, + "eval_runtime": 0.3338, + "eval_samples_per_second": 560.142, + "eval_steps_per_second": 8.986, + "step": 3604 + }, + { + "epoch": 35.0, + "grad_norm": 0.8107590675354004, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.117, + "step": 3710 + }, + { + "epoch": 35.0, + "eval_LOCATION_f1": 0.7388535031847132, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6744186046511628, + "eval_LOCATION_recall": 0.8169014084507042, + "eval_ORGANIZATION_f1": 0.6493506493506495, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5617977528089888, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8507936507936508, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8121212121212121, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8771929824561403, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.13819974660873413, + "eval_overall_accuracy": 0.954898157129001, + "eval_overall_f1": 0.7830687830687829, + "eval_overall_precision": 0.7237163814180929, + "eval_overall_recall": 0.8530259365994236, + "eval_runtime": 0.328, + "eval_samples_per_second": 570.151, + "eval_steps_per_second": 9.147, + "step": 3710 + }, + { + "epoch": 36.0, + "grad_norm": 0.9941292405128479, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.1139, + "step": 3816 + }, + { + "epoch": 36.0, + "eval_LOCATION_f1": 0.7643312101910827, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6976744186046512, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.6792452830188681, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.574468085106383, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8471337579617835, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8109756097560976, + "eval_PERSON_recall": 0.8866666666666667, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8771929824561403, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.13795477151870728, + "eval_overall_accuracy": 0.9546556741028128, + "eval_overall_f1": 0.7921052631578948, + "eval_overall_precision": 0.7288135593220338, + "eval_overall_recall": 0.8674351585014409, + "eval_runtime": 0.3278, + "eval_samples_per_second": 570.51, + "eval_steps_per_second": 9.153, + "step": 3816 + }, + { + "epoch": 37.0, + "grad_norm": 1.089221477508545, + "learning_rate": 3.15e-05, + "loss": 0.1112, + "step": 3922 + }, + { + "epoch": 37.0, + "eval_LOCATION_f1": 0.7547169811320754, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6818181818181818, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.6923076923076923, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5934065934065934, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8471337579617835, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8109756097560976, + "eval_PERSON_recall": 0.8866666666666667, + "eval_QUANTITY_f1": 0.7466666666666666, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8275862068965518, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1449616551399231, + "eval_overall_accuracy": 0.9527158098933075, + "eval_overall_f1": 0.7847769028871391, + "eval_overall_precision": 0.7204819277108434, + "eval_overall_recall": 0.861671469740634, + "eval_runtime": 0.3265, + "eval_samples_per_second": 572.826, + "eval_steps_per_second": 9.19, + "step": 3922 + }, + { + "epoch": 38.0, + "grad_norm": 0.7692789435386658, + "learning_rate": 3.1e-05, + "loss": 0.1125, + "step": 4028 + }, + { + "epoch": 38.0, + "eval_LOCATION_f1": 0.751592356687898, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.686046511627907, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.6710526315789473, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5862068965517241, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8562300319488818, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8220858895705522, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.7671232876712328, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13578325510025024, + "eval_overall_accuracy": 0.9561105722599418, + "eval_overall_f1": 0.7872340425531915, + "eval_overall_precision": 0.7308641975308642, + "eval_overall_recall": 0.8530259365994236, + "eval_runtime": 0.3341, + "eval_samples_per_second": 559.764, + "eval_steps_per_second": 8.98, + "step": 4028 + }, + { + "epoch": 39.0, + "grad_norm": 2.1592094898223877, + "learning_rate": 3.05e-05, + "loss": 0.1098, + "step": 4134 + }, + { + "epoch": 39.0, + "eval_LOCATION_f1": 0.7643312101910827, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6976744186046512, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.6582278481012658, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5591397849462365, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8535031847133757, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8170731707317073, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.7777777777777778, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.717948717948718, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8771929824561403, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.13348892331123352, + "eval_overall_accuracy": 0.9561105722599418, + "eval_overall_f1": 0.7889182058047493, + "eval_overall_precision": 0.7274939172749392, + "eval_overall_recall": 0.861671469740634, + "eval_runtime": 0.3327, + "eval_samples_per_second": 562.121, + "eval_steps_per_second": 9.018, + "step": 4134 + }, + { + "epoch": 40.0, + "grad_norm": 0.7562073469161987, + "learning_rate": 3e-05, + "loss": 0.1104, + "step": 4240 + }, + { + "epoch": 40.0, + "eval_LOCATION_f1": 0.782051282051282, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7176470588235294, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6896551724137931, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.625, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8535031847133757, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8170731707317073, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13004843890666962, + "eval_overall_accuracy": 0.9585354025218235, + "eval_overall_f1": 0.8010752688172043, + "eval_overall_precision": 0.7506297229219143, + "eval_overall_recall": 0.8587896253602305, + "eval_runtime": 0.3275, + "eval_samples_per_second": 570.926, + "eval_steps_per_second": 9.159, + "step": 4240 + }, + { + "epoch": 41.0, + "grad_norm": 0.6000316143035889, + "learning_rate": 2.95e-05, + "loss": 0.1072, + "step": 4346 + }, + { + "epoch": 41.0, + "eval_LOCATION_f1": 0.751592356687898, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.686046511627907, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.6709677419354838, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5777777777777777, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8562300319488818, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8220858895705522, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13558167219161987, + "eval_overall_accuracy": 0.9561105722599418, + "eval_overall_f1": 0.7904509283819627, + "eval_overall_precision": 0.7321867321867321, + "eval_overall_recall": 0.8587896253602305, + "eval_runtime": 0.3335, + "eval_samples_per_second": 560.68, + "eval_steps_per_second": 8.995, + "step": 4346 + }, + { + "epoch": 42.0, + "grad_norm": 0.9464073181152344, + "learning_rate": 2.9e-05, + "loss": 0.1083, + "step": 4452 + }, + { + "epoch": 42.0, + "eval_LOCATION_f1": 0.751592356687898, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.686046511627907, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.6666666666666667, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5795454545454546, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8535031847133757, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8170731707317073, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.7777777777777778, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.717948717948718, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13643476366996765, + "eval_overall_accuracy": 0.9551406401551892, + "eval_overall_f1": 0.7861885790172644, + "eval_overall_precision": 0.729064039408867, + "eval_overall_recall": 0.8530259365994236, + "eval_runtime": 0.3337, + "eval_samples_per_second": 560.328, + "eval_steps_per_second": 8.989, + "step": 4452 + }, + { + "epoch": 43.0, + "grad_norm": 0.4999348223209381, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.1074, + "step": 4558 + }, + { + "epoch": 43.0, + "eval_LOCATION_f1": 0.7770700636942676, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7093023255813954, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6805555555555556, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.620253164556962, + "eval_ORGANIZATION_recall": 0.7538461538461538, + "eval_PERSON_f1": 0.8434504792332268, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8098159509202454, + "eval_PERSON_recall": 0.88, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8771929824561403, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1306508332490921, + "eval_overall_accuracy": 0.9575654704170709, + "eval_overall_f1": 0.7967698519515478, + "eval_overall_precision": 0.7474747474747475, + "eval_overall_recall": 0.8530259365994236, + "eval_runtime": 0.3335, + "eval_samples_per_second": 560.694, + "eval_steps_per_second": 8.995, + "step": 4558 + }, + { + "epoch": 44.0, + "grad_norm": 0.9807902574539185, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.1062, + "step": 4664 + }, + { + "epoch": 44.0, + "eval_LOCATION_f1": 0.7922077922077921, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7349397590361446, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6938775510204082, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6219512195121951, + "eval_ORGANIZATION_recall": 0.7846153846153846, + "eval_PERSON_f1": 0.8571428571428572, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8181818181818182, + "eval_PERSON_recall": 0.9, + "eval_QUANTITY_f1": 0.7777777777777778, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.717948717948718, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8771929824561403, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.12918023765087128, + "eval_overall_accuracy": 0.9587778855480117, + "eval_overall_f1": 0.8053691275167785, + "eval_overall_precision": 0.7537688442211056, + "eval_overall_recall": 0.8645533141210374, + "eval_runtime": 0.3318, + "eval_samples_per_second": 563.615, + "eval_steps_per_second": 9.042, + "step": 4664 + }, + { + "epoch": 45.0, + "grad_norm": 6.018434524536133, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.1055, + "step": 4770 + }, + { + "epoch": 45.0, + "eval_LOCATION_f1": 0.782051282051282, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7176470588235294, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6944444444444444, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6329113924050633, + "eval_ORGANIZATION_recall": 0.7692307692307693, + "eval_PERSON_f1": 0.8571428571428572, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8181818181818182, + "eval_PERSON_recall": 0.9, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13075633347034454, + "eval_overall_accuracy": 0.9585354025218235, + "eval_overall_f1": 0.8037634408602151, + "eval_overall_precision": 0.7531486146095718, + "eval_overall_recall": 0.861671469740634, + "eval_runtime": 0.327, + "eval_samples_per_second": 571.884, + "eval_steps_per_second": 9.175, + "step": 4770 + }, + { + "epoch": 46.0, + "grad_norm": 0.8684478998184204, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.104, + "step": 4876 + }, + { + "epoch": 46.0, + "eval_LOCATION_f1": 0.7922077922077921, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7349397590361446, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6842105263157896, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5977011494252874, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8562300319488818, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8220858895705522, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.7887323943661972, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7368421052631579, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8771929824561403, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8620689655172413, + "eval_TIME_recall": 0.8928571428571429, + "eval_loss": 0.1286059468984604, + "eval_overall_accuracy": 0.9587778855480117, + "eval_overall_f1": 0.8032128514056224, + "eval_overall_precision": 0.75, + "eval_overall_recall": 0.8645533141210374, + "eval_runtime": 0.3349, + "eval_samples_per_second": 558.33, + "eval_steps_per_second": 8.957, + "step": 4876 + }, + { + "epoch": 47.0, + "grad_norm": 1.5696133375167847, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.1024, + "step": 4982 + }, + { + "epoch": 47.0, + "eval_LOCATION_f1": 0.782051282051282, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7176470588235294, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6967741935483871, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8571428571428572, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8181818181818182, + "eval_PERSON_recall": 0.9, + "eval_QUANTITY_f1": 0.7777777777777778, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.717948717948718, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13379882276058197, + "eval_overall_accuracy": 0.9570805043646945, + "eval_overall_f1": 0.7999999999999999, + "eval_overall_precision": 0.7401960784313726, + "eval_overall_recall": 0.8703170028818443, + "eval_runtime": 0.3318, + "eval_samples_per_second": 563.559, + "eval_steps_per_second": 9.041, + "step": 4982 + }, + { + "epoch": 48.0, + "grad_norm": 5.395870685577393, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.1024, + "step": 5088 + }, + { + "epoch": 48.0, + "eval_LOCATION_f1": 0.7870967741935485, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7261904761904762, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6967741935483871, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8571428571428572, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8181818181818182, + "eval_PERSON_recall": 0.9, + "eval_QUANTITY_f1": 0.8169014084507042, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7631578947368421, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13115271925926208, + "eval_overall_accuracy": 0.9582929194956353, + "eval_overall_f1": 0.8047808764940241, + "eval_overall_precision": 0.7463054187192119, + "eval_overall_recall": 0.8731988472622478, + "eval_runtime": 0.3334, + "eval_samples_per_second": 560.824, + "eval_steps_per_second": 8.997, + "step": 5088 + }, + { + "epoch": 49.0, + "grad_norm": 0.6169828772544861, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.101, + "step": 5194 + }, + { + "epoch": 49.0, + "eval_LOCATION_f1": 0.7741935483870968, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7142857142857143, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7027027027027027, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6265060240963856, + "eval_ORGANIZATION_recall": 0.8, + "eval_PERSON_f1": 0.8571428571428572, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8181818181818182, + "eval_PERSON_recall": 0.9, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13072074949741364, + "eval_overall_accuracy": 0.957807953443259, + "eval_overall_f1": 0.8021390374331551, + "eval_overall_precision": 0.7481296758104738, + "eval_overall_recall": 0.8645533141210374, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.838, + "eval_steps_per_second": 9.013, + "step": 5194 + }, + { + "epoch": 50.0, + "grad_norm": 1.9421120882034302, + "learning_rate": 2.5e-05, + "loss": 0.0983, + "step": 5300 + }, + { + "epoch": 50.0, + "eval_LOCATION_f1": 0.759493670886076, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6896551724137931, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.6962025316455697, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5913978494623656, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8607594936708859, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8192771084337349, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13512645661830902, + "eval_overall_accuracy": 0.9565955383123181, + "eval_overall_f1": 0.7989487516425756, + "eval_overall_precision": 0.7342995169082126, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.333, + "eval_samples_per_second": 561.616, + "eval_steps_per_second": 9.01, + "step": 5300 + }, + { + "epoch": 51.0, + "grad_norm": 0.9954243898391724, + "learning_rate": 2.45e-05, + "loss": 0.0992, + "step": 5406 + }, + { + "epoch": 51.0, + "eval_LOCATION_f1": 0.7770700636942676, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7093023255813954, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.7189542483660131, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.625, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8607594936708859, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8192771084337349, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13235999643802643, + "eval_overall_accuracy": 0.9587778855480117, + "eval_overall_f1": 0.8079470198675496, + "eval_overall_precision": 0.7475490196078431, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.3277, + "eval_samples_per_second": 570.726, + "eval_steps_per_second": 9.156, + "step": 5406 + }, + { + "epoch": 52.0, + "grad_norm": 1.0411736965179443, + "learning_rate": 2.4e-05, + "loss": 0.0995, + "step": 5512 + }, + { + "epoch": 52.0, + "eval_LOCATION_f1": 0.7870967741935485, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7261904761904762, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.7114093959731543, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6309523809523809, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.858974358974359, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8271604938271605, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.7567567567567567, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6829268292682927, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13270841538906097, + "eval_overall_accuracy": 0.9575654704170709, + "eval_overall_f1": 0.8032128514056224, + "eval_overall_precision": 0.75, + "eval_overall_recall": 0.8645533141210374, + "eval_runtime": 0.3288, + "eval_samples_per_second": 568.722, + "eval_steps_per_second": 9.124, + "step": 5512 + }, + { + "epoch": 53.0, + "grad_norm": 1.1571258306503296, + "learning_rate": 2.35e-05, + "loss": 0.097, + "step": 5618 + }, + { + "epoch": 53.0, + "eval_LOCATION_f1": 0.7672955974842768, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6931818181818182, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6962025316455697, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5913978494623656, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.7466666666666666, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.14093737304210663, + "eval_overall_accuracy": 0.9546556741028128, + "eval_overall_f1": 0.7958115183246073, + "eval_overall_precision": 0.7290167865707434, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.3268, + "eval_samples_per_second": 572.196, + "eval_steps_per_second": 9.18, + "step": 5618 + }, + { + "epoch": 54.0, + "grad_norm": 1.3807525634765625, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.0958, + "step": 5724 + }, + { + "epoch": 54.0, + "eval_LOCATION_f1": 0.7692307692307694, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7058823529411765, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7006369426751592, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5978260869565217, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8535031847133757, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8170731707317073, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13698944449424744, + "eval_overall_accuracy": 0.9556256062075654, + "eval_overall_f1": 0.7978863936591809, + "eval_overall_precision": 0.7365853658536585, + "eval_overall_recall": 0.8703170028818443, + "eval_runtime": 0.3342, + "eval_samples_per_second": 559.593, + "eval_steps_per_second": 8.977, + "step": 5724 + }, + { + "epoch": 55.0, + "grad_norm": 0.8431147933006287, + "learning_rate": 2.25e-05, + "loss": 0.097, + "step": 5830 + }, + { + "epoch": 55.0, + "eval_LOCATION_f1": 0.8, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7380952380952381, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.6923076923076923, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.5934065934065934, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.858974358974359, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8271604938271605, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.8169014084507042, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7631578947368421, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13174039125442505, + "eval_overall_accuracy": 0.9585354025218235, + "eval_overall_f1": 0.8069241011984021, + "eval_overall_precision": 0.75, + "eval_overall_recall": 0.8731988472622478, + "eval_runtime": 0.3339, + "eval_samples_per_second": 560.057, + "eval_steps_per_second": 8.985, + "step": 5830 + }, + { + "epoch": 56.0, + "grad_norm": 1.8206969499588013, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.0966, + "step": 5936 + }, + { + "epoch": 56.0, + "eval_LOCATION_f1": 0.7643312101910827, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6976744186046512, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7096774193548387, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6111111111111112, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8571428571428572, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8181818181818182, + "eval_PERSON_recall": 0.9, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13501103222370148, + "eval_overall_accuracy": 0.9570805043646945, + "eval_overall_f1": 0.8005284015852047, + "eval_overall_precision": 0.7390243902439024, + "eval_overall_recall": 0.8731988472622478, + "eval_runtime": 0.3332, + "eval_samples_per_second": 561.301, + "eval_steps_per_second": 9.005, + "step": 5936 + }, + { + "epoch": 57.0, + "grad_norm": 1.323081135749817, + "learning_rate": 2.15e-05, + "loss": 0.0937, + "step": 6042 + }, + { + "epoch": 57.0, + "eval_LOCATION_f1": 0.7741935483870968, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7142857142857143, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7058823529411765, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6136363636363636, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8571428571428572, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8181818181818182, + "eval_PERSON_recall": 0.9, + "eval_QUANTITY_f1": 0.8169014084507042, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7631578947368421, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1313411146402359, + "eval_overall_accuracy": 0.9582929194956353, + "eval_overall_f1": 0.8042609853528628, + "eval_overall_precision": 0.7475247524752475, + "eval_overall_recall": 0.8703170028818443, + "eval_runtime": 0.3271, + "eval_samples_per_second": 571.695, + "eval_steps_per_second": 9.172, + "step": 6042 + }, + { + "epoch": 58.0, + "grad_norm": 0.6552602648735046, + "learning_rate": 2.1e-05, + "loss": 0.0959, + "step": 6148 + }, + { + "epoch": 58.0, + "eval_LOCATION_f1": 0.759493670886076, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6896551724137931, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7012987012987013, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6067415730337079, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.858974358974359, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8271604938271605, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.8169014084507042, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7631578947368421, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13326187431812286, + "eval_overall_accuracy": 0.9573229873908826, + "eval_overall_f1": 0.800531914893617, + "eval_overall_precision": 0.7432098765432099, + "eval_overall_recall": 0.8674351585014409, + "eval_runtime": 0.3272, + "eval_samples_per_second": 571.509, + "eval_steps_per_second": 9.169, + "step": 6148 + }, + { + "epoch": 59.0, + "grad_norm": 3.051168441772461, + "learning_rate": 2.05e-05, + "loss": 0.092, + "step": 6254 + }, + { + "epoch": 59.0, + "eval_LOCATION_f1": 0.751592356687898, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.686046511627907, + "eval_LOCATION_recall": 0.8309859154929577, + "eval_ORGANIZATION_f1": 0.7096774193548387, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6111111111111112, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8626198083067093, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8282208588957055, + "eval_PERSON_recall": 0.9, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13443896174430847, + "eval_overall_accuracy": 0.9565955383123181, + "eval_overall_f1": 0.8010610079575596, + "eval_overall_precision": 0.742014742014742, + "eval_overall_recall": 0.8703170028818443, + "eval_runtime": 0.3257, + "eval_samples_per_second": 574.137, + "eval_steps_per_second": 9.211, + "step": 6254 + }, + { + "epoch": 60.0, + "grad_norm": 1.0962821245193481, + "learning_rate": 2e-05, + "loss": 0.0932, + "step": 6360 + }, + { + "epoch": 60.0, + "eval_LOCATION_f1": 0.7922077922077921, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7349397590361446, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.6973684210526315, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6091954022988506, + "eval_ORGANIZATION_recall": 0.8153846153846154, + "eval_PERSON_f1": 0.8617363344051446, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8322981366459627, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13036972284317017, + "eval_overall_accuracy": 0.9585354025218235, + "eval_overall_f1": 0.8069705093833781, + "eval_overall_precision": 0.7543859649122807, + "eval_overall_recall": 0.8674351585014409, + "eval_runtime": 0.3264, + "eval_samples_per_second": 572.876, + "eval_steps_per_second": 9.191, + "step": 6360 + }, + { + "epoch": 61.0, + "grad_norm": 0.8030592799186707, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.0906, + "step": 6466 + }, + { + "epoch": 61.0, + "eval_LOCATION_f1": 0.7741935483870968, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7142857142857143, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7058823529411765, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6136363636363636, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.7887323943661972, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7368421052631579, + "eval_QUANTITY_recall": 0.8484848484848485, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1298861801624298, + "eval_overall_accuracy": 0.9585354025218235, + "eval_overall_f1": 0.8042609853528628, + "eval_overall_precision": 0.7475247524752475, + "eval_overall_recall": 0.8703170028818443, + "eval_runtime": 0.3273, + "eval_samples_per_second": 571.359, + "eval_steps_per_second": 9.166, + "step": 6466 + }, + { + "epoch": 62.0, + "grad_norm": 1.4909107685089111, + "learning_rate": 1.9e-05, + "loss": 0.0906, + "step": 6572 + }, + { + "epoch": 62.0, + "eval_LOCATION_f1": 0.7643312101910827, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6976744186046512, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7272727272727273, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6292134831460674, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8662420382165605, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1340462863445282, + "eval_overall_accuracy": 0.9573229873908826, + "eval_overall_f1": 0.8079470198675496, + "eval_overall_precision": 0.7475490196078431, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.3269, + "eval_samples_per_second": 571.958, + "eval_steps_per_second": 9.176, + "step": 6572 + }, + { + "epoch": 63.0, + "grad_norm": 3.782954216003418, + "learning_rate": 1.85e-05, + "loss": 0.0919, + "step": 6678 + }, + { + "epoch": 63.0, + "eval_LOCATION_f1": 0.7741935483870968, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7142857142857143, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7189542483660131, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.625, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.858974358974359, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8271604938271605, + "eval_PERSON_recall": 0.8933333333333333, + "eval_QUANTITY_f1": 0.8169014084507042, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7631578947368421, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13134948909282684, + "eval_overall_accuracy": 0.9587778855480117, + "eval_overall_f1": 0.8074866310160428, + "eval_overall_precision": 0.7531172069825436, + "eval_overall_recall": 0.8703170028818443, + "eval_runtime": 0.3279, + "eval_samples_per_second": 570.382, + "eval_steps_per_second": 9.151, + "step": 6678 + }, + { + "epoch": 64.0, + "grad_norm": 0.4201819598674774, + "learning_rate": 1.8e-05, + "loss": 0.0904, + "step": 6784 + }, + { + "epoch": 64.0, + "eval_LOCATION_f1": 0.7922077922077921, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7349397590361446, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.7199999999999999, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6352941176470588, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13214616477489471, + "eval_overall_accuracy": 0.9595053346265762, + "eval_overall_f1": 0.8144192256341789, + "eval_overall_precision": 0.7587064676616916, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.3282, + "eval_samples_per_second": 569.776, + "eval_steps_per_second": 9.141, + "step": 6784 + }, + { + "epoch": 65.0, + "grad_norm": 1.1749016046524048, + "learning_rate": 1.75e-05, + "loss": 0.0927, + "step": 6890 + }, + { + "epoch": 65.0, + "eval_LOCATION_f1": 0.7643312101910827, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6976744186046512, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7096774193548387, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6111111111111112, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1333334743976593, + "eval_overall_accuracy": 0.9570805043646945, + "eval_overall_f1": 0.8031704095112285, + "eval_overall_precision": 0.7414634146341463, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.3273, + "eval_samples_per_second": 571.279, + "eval_steps_per_second": 9.165, + "step": 6890 + }, + { + "epoch": 66.0, + "grad_norm": 0.7773393988609314, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.0884, + "step": 6996 + }, + { + "epoch": 66.0, + "eval_LOCATION_f1": 0.7672955974842768, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6931818181818182, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.7272727272727273, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6292134831460674, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13700337707996368, + "eval_overall_accuracy": 0.9568380213385063, + "eval_overall_f1": 0.8073878627968336, + "eval_overall_precision": 0.7445255474452555, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.3282, + "eval_samples_per_second": 569.821, + "eval_steps_per_second": 9.142, + "step": 6996 + }, + { + "epoch": 67.0, + "grad_norm": 0.6766928434371948, + "learning_rate": 1.65e-05, + "loss": 0.0899, + "step": 7102 + }, + { + "epoch": 67.0, + "eval_LOCATION_f1": 0.8, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7380952380952381, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.728476821192053, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6395348837209303, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8670886075949367, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8253012048192772, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7837837837837839, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7073170731707317, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13119837641716003, + "eval_overall_accuracy": 0.9599903006789525, + "eval_overall_f1": 0.8154050464807437, + "eval_overall_precision": 0.7561576354679803, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.3283, + "eval_samples_per_second": 569.589, + "eval_steps_per_second": 9.138, + "step": 7102 + }, + { + "epoch": 68.0, + "grad_norm": 0.9046867489814758, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.0876, + "step": 7208 + }, + { + "epoch": 68.0, + "eval_LOCATION_f1": 0.7692307692307694, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7058823529411765, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7189542483660131, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.625, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13087303936481476, + "eval_overall_accuracy": 0.9590203685741998, + "eval_overall_f1": 0.8100929614873836, + "eval_overall_precision": 0.7512315270935961, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.3275, + "eval_samples_per_second": 570.921, + "eval_steps_per_second": 9.159, + "step": 7208 + }, + { + "epoch": 69.0, + "grad_norm": 0.9608238935470581, + "learning_rate": 1.55e-05, + "loss": 0.0893, + "step": 7314 + }, + { + "epoch": 69.0, + "eval_LOCATION_f1": 0.782051282051282, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7176470588235294, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.738255033557047, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6547619047619048, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.8169014084507042, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7631578947368421, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1295914202928543, + "eval_overall_accuracy": 0.960717749757517, + "eval_overall_f1": 0.8181818181818181, + "eval_overall_precision": 0.7630922693266833, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.3266, + "eval_samples_per_second": 572.512, + "eval_steps_per_second": 9.185, + "step": 7314 + }, + { + "epoch": 70.0, + "grad_norm": 0.4156774580478668, + "learning_rate": 1.5e-05, + "loss": 0.0866, + "step": 7420 + }, + { + "epoch": 70.0, + "eval_LOCATION_f1": 0.782051282051282, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7176470588235294, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.738255033557047, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6547619047619048, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.8169014084507042, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7631578947368421, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.12943106889724731, + "eval_overall_accuracy": 0.960717749757517, + "eval_overall_f1": 0.8181818181818181, + "eval_overall_precision": 0.7630922693266833, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.3276, + "eval_samples_per_second": 570.88, + "eval_steps_per_second": 9.159, + "step": 7420 + }, + { + "epoch": 71.0, + "grad_norm": 2.0328562259674072, + "learning_rate": 1.45e-05, + "loss": 0.0891, + "step": 7526 + }, + { + "epoch": 71.0, + "eval_LOCATION_f1": 0.7625, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6853932584269663, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.717948717948718, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6153846153846154, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8634920634920634, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8242424242424242, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13948681950569153, + "eval_overall_accuracy": 0.9556256062075654, + "eval_overall_f1": 0.8042049934296976, + "eval_overall_precision": 0.7391304347826086, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.3335, + "eval_samples_per_second": 560.739, + "eval_steps_per_second": 8.996, + "step": 7526 + }, + { + "epoch": 72.0, + "grad_norm": 1.8533878326416016, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.0853, + "step": 7632 + }, + { + "epoch": 72.0, + "eval_LOCATION_f1": 0.782051282051282, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7176470588235294, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.7333333333333334, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6470588235294118, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8662420382165605, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.8169014084507042, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7631578947368421, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13026294112205505, + "eval_overall_accuracy": 0.9599903006789525, + "eval_overall_f1": 0.8155080213903744, + "eval_overall_precision": 0.7605985037406484, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.3284, + "eval_samples_per_second": 569.47, + "eval_steps_per_second": 9.136, + "step": 7632 + }, + { + "epoch": 73.0, + "grad_norm": 0.4442669451236725, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.0856, + "step": 7738 + }, + { + "epoch": 73.0, + "eval_LOCATION_f1": 0.7692307692307694, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7058823529411765, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8662420382165605, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13162143528461456, + "eval_overall_accuracy": 0.9595053346265762, + "eval_overall_f1": 0.8100929614873836, + "eval_overall_precision": 0.7512315270935961, + "eval_overall_recall": 0.8789625360230547, + "eval_runtime": 0.3325, + "eval_samples_per_second": 562.459, + "eval_steps_per_second": 9.023, + "step": 7738 + }, + { + "epoch": 74.0, + "grad_norm": 1.493489384651184, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.0858, + "step": 7844 + }, + { + "epoch": 74.0, + "eval_LOCATION_f1": 0.7870967741935485, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7261904761904762, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.7346938775510204, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6585365853658537, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8690095846645367, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8343558282208589, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.12819288671016693, + "eval_overall_accuracy": 0.9604752667313288, + "eval_overall_f1": 0.8172043010752689, + "eval_overall_precision": 0.7657430730478589, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.3323, + "eval_samples_per_second": 562.673, + "eval_steps_per_second": 9.027, + "step": 7844 + }, + { + "epoch": 75.0, + "grad_norm": 0.5820181965827942, + "learning_rate": 1.25e-05, + "loss": 0.086, + "step": 7950 + }, + { + "epoch": 75.0, + "eval_LOCATION_f1": 0.782051282051282, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7176470588235294, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.7297297297297298, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6506024096385542, + "eval_ORGANIZATION_recall": 0.8307692307692308, + "eval_PERSON_f1": 0.8662420382165605, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8292682926829268, + "eval_PERSON_recall": 0.9066666666666666, + "eval_QUANTITY_f1": 0.7837837837837839, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7073170731707317, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.12972044944763184, + "eval_overall_accuracy": 0.9587778855480117, + "eval_overall_f1": 0.8117489986648865, + "eval_overall_precision": 0.7562189054726368, + "eval_overall_recall": 0.8760806916426513, + "eval_runtime": 0.3337, + "eval_samples_per_second": 560.301, + "eval_steps_per_second": 8.989, + "step": 7950 + }, + { + "epoch": 76.0, + "grad_norm": 0.7891073226928711, + "learning_rate": 1.2e-05, + "loss": 0.0866, + "step": 8056 + }, + { + "epoch": 76.0, + "eval_LOCATION_f1": 0.7898089171974522, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7209302325581395, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.728476821192053, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6395348837209303, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13120706379413605, + "eval_overall_accuracy": 0.9590203685741998, + "eval_overall_f1": 0.8154050464807437, + "eval_overall_precision": 0.7561576354679803, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.327, + "eval_samples_per_second": 571.85, + "eval_steps_per_second": 9.174, + "step": 8056 + }, + { + "epoch": 77.0, + "grad_norm": 2.8638579845428467, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.0863, + "step": 8162 + }, + { + "epoch": 77.0, + "eval_LOCATION_f1": 0.7898089171974522, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7209302325581395, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.7466666666666668, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6588235294117647, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13120390474796295, + "eval_overall_accuracy": 0.9595053346265762, + "eval_overall_f1": 0.8191489361702129, + "eval_overall_precision": 0.7604938271604939, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.3338, + "eval_samples_per_second": 560.239, + "eval_steps_per_second": 8.988, + "step": 8162 + }, + { + "epoch": 78.0, + "grad_norm": 1.1735546588897705, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0835, + "step": 8268 + }, + { + "epoch": 78.0, + "eval_LOCATION_f1": 0.7974683544303797, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7241379310344828, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.717948717948718, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6153846153846154, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8670886075949367, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8253012048192772, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13249796628952026, + "eval_overall_accuracy": 0.9592628516003879, + "eval_overall_f1": 0.8142292490118578, + "eval_overall_precision": 0.75, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.3332, + "eval_samples_per_second": 561.208, + "eval_steps_per_second": 9.003, + "step": 8268 + }, + { + "epoch": 79.0, + "grad_norm": 0.7526395916938782, + "learning_rate": 1.05e-05, + "loss": 0.0842, + "step": 8374 + }, + { + "epoch": 79.0, + "eval_LOCATION_f1": 0.779874213836478, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7045454545454546, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.7225806451612903, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6222222222222222, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13580352067947388, + "eval_overall_accuracy": 0.9570805043646945, + "eval_overall_f1": 0.8115942028985507, + "eval_overall_precision": 0.7475728155339806, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.332, + "eval_samples_per_second": 563.222, + "eval_steps_per_second": 9.036, + "step": 8374 + }, + { + "epoch": 80.0, + "grad_norm": 1.0333696603775024, + "learning_rate": 1e-05, + "loss": 0.085, + "step": 8480 + }, + { + "epoch": 80.0, + "eval_LOCATION_f1": 0.779874213836478, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7045454545454546, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.7225806451612903, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6222222222222222, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13522081077098846, + "eval_overall_accuracy": 0.957807953443259, + "eval_overall_f1": 0.8115942028985507, + "eval_overall_precision": 0.7475728155339806, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.3337, + "eval_samples_per_second": 560.331, + "eval_steps_per_second": 8.989, + "step": 8480 + }, + { + "epoch": 81.0, + "grad_norm": 1.006616473197937, + "learning_rate": 9.5e-06, + "loss": 0.0841, + "step": 8586 + }, + { + "epoch": 81.0, + "eval_LOCATION_f1": 0.7848101265822784, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7126436781609196, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.7225806451612903, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6222222222222222, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13434429466724396, + "eval_overall_accuracy": 0.9580504364694471, + "eval_overall_f1": 0.8126649076517151, + "eval_overall_precision": 0.7493917274939172, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.3333, + "eval_samples_per_second": 560.986, + "eval_steps_per_second": 9.0, + "step": 8586 + }, + { + "epoch": 82.0, + "grad_norm": 1.522070050239563, + "learning_rate": 9e-06, + "loss": 0.0841, + "step": 8692 + }, + { + "epoch": 82.0, + "eval_LOCATION_f1": 0.779874213836478, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7045454545454546, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.7225806451612903, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6222222222222222, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8670886075949367, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8253012048192772, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1339624524116516, + "eval_overall_accuracy": 0.9580504364694471, + "eval_overall_f1": 0.8105263157894738, + "eval_overall_precision": 0.7457627118644068, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.3324, + "eval_samples_per_second": 562.652, + "eval_steps_per_second": 9.027, + "step": 8692 + }, + { + "epoch": 83.0, + "grad_norm": 1.2263017892837524, + "learning_rate": 8.500000000000002e-06, + "loss": 0.0842, + "step": 8798 + }, + { + "epoch": 83.0, + "eval_LOCATION_f1": 0.7770700636942676, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7093023255813954, + "eval_LOCATION_recall": 0.8591549295774648, + "eval_ORGANIZATION_f1": 0.7225806451612903, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6222222222222222, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13326546549797058, + "eval_overall_accuracy": 0.957807953443259, + "eval_overall_f1": 0.8110964332892999, + "eval_overall_precision": 0.748780487804878, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.3331, + "eval_samples_per_second": 561.393, + "eval_steps_per_second": 9.006, + "step": 8798 + }, + { + "epoch": 84.0, + "grad_norm": 1.2086056470870972, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0836, + "step": 8904 + }, + { + "epoch": 84.0, + "eval_LOCATION_f1": 0.7848101265822784, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7126436781609196, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.7225806451612903, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6222222222222222, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13417989015579224, + "eval_overall_accuracy": 0.9580504364694471, + "eval_overall_f1": 0.8126649076517151, + "eval_overall_precision": 0.7493917274939172, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.3322, + "eval_samples_per_second": 562.869, + "eval_steps_per_second": 9.03, + "step": 8904 + }, + { + "epoch": 85.0, + "grad_norm": 1.9969497919082642, + "learning_rate": 7.5e-06, + "loss": 0.0838, + "step": 9010 + }, + { + "epoch": 85.0, + "eval_LOCATION_f1": 0.759493670886076, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.6896551724137931, + "eval_LOCATION_recall": 0.8450704225352113, + "eval_ORGANIZATION_f1": 0.717948717948718, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6153846153846154, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13468340039253235, + "eval_overall_accuracy": 0.9573229873908826, + "eval_overall_f1": 0.8073878627968336, + "eval_overall_precision": 0.7445255474452555, + "eval_overall_recall": 0.8818443804034583, + "eval_runtime": 0.3263, + "eval_samples_per_second": 573.126, + "eval_steps_per_second": 9.195, + "step": 9010 + }, + { + "epoch": 86.0, + "grad_norm": 0.8308915495872498, + "learning_rate": 7.000000000000001e-06, + "loss": 0.0827, + "step": 9116 + }, + { + "epoch": 86.0, + "eval_LOCATION_f1": 0.7898089171974522, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7209302325581395, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.7466666666666668, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6588235294117647, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13272897899150848, + "eval_overall_accuracy": 0.9602327837051406, + "eval_overall_f1": 0.8202396804260986, + "eval_overall_precision": 0.7623762376237624, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.3338, + "eval_samples_per_second": 560.231, + "eval_steps_per_second": 8.988, + "step": 9116 + }, + { + "epoch": 87.0, + "grad_norm": 0.6697033047676086, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.0821, + "step": 9222 + }, + { + "epoch": 87.0, + "eval_LOCATION_f1": 0.7848101265822784, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7126436781609196, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.8055555555555556, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.7435897435897436, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1330462396144867, + "eval_overall_accuracy": 0.9595053346265762, + "eval_overall_f1": 0.8158940397350992, + "eval_overall_precision": 0.7549019607843137, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.3335, + "eval_samples_per_second": 560.656, + "eval_steps_per_second": 8.994, + "step": 9222 + }, + { + "epoch": 88.0, + "grad_norm": 0.6615722179412842, + "learning_rate": 6e-06, + "loss": 0.0817, + "step": 9328 + }, + { + "epoch": 88.0, + "eval_LOCATION_f1": 0.7974683544303797, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7241379310344828, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.7368421052631579, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6436781609195402, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8726114649681529, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13280968368053436, + "eval_overall_accuracy": 0.9599903006789525, + "eval_overall_f1": 0.8196286472148541, + "eval_overall_precision": 0.7592137592137592, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.3328, + "eval_samples_per_second": 561.926, + "eval_steps_per_second": 9.015, + "step": 9328 + }, + { + "epoch": 89.0, + "grad_norm": 1.8761165142059326, + "learning_rate": 5.500000000000001e-06, + "loss": 0.0827, + "step": 9434 + }, + { + "epoch": 89.0, + "eval_LOCATION_f1": 0.7974683544303797, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7241379310344828, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.7272727272727273, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6292134831460674, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1335485428571701, + "eval_overall_accuracy": 0.9585354025218235, + "eval_overall_f1": 0.8163804491413474, + "eval_overall_precision": 0.7536585365853659, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.327, + "eval_samples_per_second": 571.815, + "eval_steps_per_second": 9.174, + "step": 9434 + }, + { + "epoch": 90.0, + "grad_norm": 1.4955438375473022, + "learning_rate": 5e-06, + "loss": 0.083, + "step": 9540 + }, + { + "epoch": 90.0, + "eval_LOCATION_f1": 0.7974683544303797, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7241379310344828, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8726114649681529, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1331973373889923, + "eval_overall_accuracy": 0.9597478176527643, + "eval_overall_f1": 0.8185430463576159, + "eval_overall_precision": 0.7573529411764706, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.3266, + "eval_samples_per_second": 572.598, + "eval_steps_per_second": 9.186, + "step": 9540 + }, + { + "epoch": 91.0, + "grad_norm": 1.4503899812698364, + "learning_rate": 4.5e-06, + "loss": 0.0833, + "step": 9646 + }, + { + "epoch": 91.0, + "eval_LOCATION_f1": 0.7848101265822784, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7126436781609196, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.717948717948718, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6153846153846154, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1346951723098755, + "eval_overall_accuracy": 0.9575654704170709, + "eval_overall_f1": 0.8115942028985507, + "eval_overall_precision": 0.7475728155339806, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.3263, + "eval_samples_per_second": 573.108, + "eval_steps_per_second": 9.194, + "step": 9646 + }, + { + "epoch": 92.0, + "grad_norm": 1.369122862815857, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0818, + "step": 9752 + }, + { + "epoch": 92.0, + "eval_LOCATION_f1": 0.7974683544303797, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7241379310344828, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.7189542483660131, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.625, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8726114649681529, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1325005143880844, + "eval_overall_accuracy": 0.9599903006789525, + "eval_overall_f1": 0.8158940397350992, + "eval_overall_precision": 0.7549019607843137, + "eval_overall_recall": 0.8876080691642652, + "eval_runtime": 0.3264, + "eval_samples_per_second": 572.986, + "eval_steps_per_second": 9.192, + "step": 9752 + }, + { + "epoch": 93.0, + "grad_norm": 0.8854653835296631, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.0807, + "step": 9858 + }, + { + "epoch": 93.0, + "eval_LOCATION_f1": 0.7898089171974522, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7209302325581395, + "eval_LOCATION_recall": 0.8732394366197183, + "eval_ORGANIZATION_f1": 0.7236842105263158, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.632183908045977, + "eval_ORGANIZATION_recall": 0.8461538461538461, + "eval_PERSON_f1": 0.8726114649681529, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8353658536585366, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13171027600765228, + "eval_overall_accuracy": 0.9595053346265762, + "eval_overall_f1": 0.8154050464807437, + "eval_overall_precision": 0.7561576354679803, + "eval_overall_recall": 0.8847262247838616, + "eval_runtime": 0.3279, + "eval_samples_per_second": 570.285, + "eval_steps_per_second": 9.149, + "step": 9858 + }, + { + "epoch": 94.0, + "grad_norm": 1.5826424360275269, + "learning_rate": 3e-06, + "loss": 0.0814, + "step": 9964 + }, + { + "epoch": 94.0, + "eval_LOCATION_f1": 0.7924528301886792, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7159090909090909, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.133773535490036, + "eval_overall_accuracy": 0.9585354025218235, + "eval_overall_f1": 0.8163804491413474, + "eval_overall_precision": 0.7536585365853659, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.3272, + "eval_samples_per_second": 571.508, + "eval_steps_per_second": 9.169, + "step": 9964 + }, + { + "epoch": 95.0, + "grad_norm": 1.2067687511444092, + "learning_rate": 2.5e-06, + "loss": 0.0826, + "step": 10070 + }, + { + "epoch": 95.0, + "eval_LOCATION_f1": 0.7974683544303797, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7241379310344828, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13358350098133087, + "eval_overall_accuracy": 0.9590203685741998, + "eval_overall_f1": 0.8174603174603176, + "eval_overall_precision": 0.7555012224938875, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.3276, + "eval_samples_per_second": 570.746, + "eval_steps_per_second": 9.156, + "step": 10070 + }, + { + "epoch": 96.0, + "grad_norm": 0.7109037637710571, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.0816, + "step": 10176 + }, + { + "epoch": 96.0, + "eval_LOCATION_f1": 0.7924528301886792, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7159090909090909, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13357478380203247, + "eval_overall_accuracy": 0.9587778855480117, + "eval_overall_f1": 0.8163804491413474, + "eval_overall_precision": 0.7536585365853659, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.329, + "eval_samples_per_second": 568.364, + "eval_steps_per_second": 9.118, + "step": 10176 + }, + { + "epoch": 97.0, + "grad_norm": 1.166966438293457, + "learning_rate": 1.5e-06, + "loss": 0.083, + "step": 10282 + }, + { + "epoch": 97.0, + "eval_LOCATION_f1": 0.7924528301886792, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7159090909090909, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13330641388893127, + "eval_overall_accuracy": 0.9592628516003879, + "eval_overall_f1": 0.8163804491413474, + "eval_overall_precision": 0.7536585365853659, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.3324, + "eval_samples_per_second": 562.52, + "eval_steps_per_second": 9.024, + "step": 10282 + }, + { + "epoch": 98.0, + "grad_norm": 1.0665535926818848, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.082, + "step": 10388 + }, + { + "epoch": 98.0, + "eval_LOCATION_f1": 0.7974683544303797, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7241379310344828, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13307832181453705, + "eval_overall_accuracy": 0.9595053346265762, + "eval_overall_f1": 0.8174603174603176, + "eval_overall_precision": 0.7555012224938875, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.3263, + "eval_samples_per_second": 573.175, + "eval_steps_per_second": 9.195, + "step": 10388 + }, + { + "epoch": 99.0, + "grad_norm": 1.6573951244354248, + "learning_rate": 5.000000000000001e-07, + "loss": 0.0804, + "step": 10494 + }, + { + "epoch": 99.0, + "eval_LOCATION_f1": 0.7924528301886792, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7159090909090909, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.13340671360492706, + "eval_overall_accuracy": 0.9592628516003879, + "eval_overall_f1": 0.8163804491413474, + "eval_overall_precision": 0.7536585365853659, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.3273, + "eval_samples_per_second": 571.307, + "eval_steps_per_second": 9.165, + "step": 10494 + }, + { + "epoch": 100.0, + "grad_norm": 0.5998202562332153, + "learning_rate": 0.0, + "loss": 0.0806, + "step": 10600 + }, + { + "epoch": 100.0, + "eval_LOCATION_f1": 0.7974683544303797, + "eval_LOCATION_number": 71, + "eval_LOCATION_precision": 0.7241379310344828, + "eval_LOCATION_recall": 0.8873239436619719, + "eval_ORGANIZATION_f1": 0.7320261437908497, + "eval_ORGANIZATION_number": 65, + "eval_ORGANIZATION_precision": 0.6363636363636364, + "eval_ORGANIZATION_recall": 0.8615384615384616, + "eval_PERSON_f1": 0.8698412698412699, + "eval_PERSON_number": 150, + "eval_PERSON_precision": 0.8303030303030303, + "eval_PERSON_recall": 0.9133333333333333, + "eval_QUANTITY_f1": 0.7945205479452054, + "eval_QUANTITY_number": 33, + "eval_QUANTITY_precision": 0.725, + "eval_QUANTITY_recall": 0.8787878787878788, + "eval_TIME_f1": 0.8421052631578947, + "eval_TIME_number": 28, + "eval_TIME_precision": 0.8275862068965517, + "eval_TIME_recall": 0.8571428571428571, + "eval_loss": 0.1330995410680771, + "eval_overall_accuracy": 0.9595053346265762, + "eval_overall_f1": 0.8174603174603176, + "eval_overall_precision": 0.7555012224938875, + "eval_overall_recall": 0.8904899135446686, + "eval_runtime": 0.3284, + "eval_samples_per_second": 569.496, + "eval_steps_per_second": 9.136, + "step": 10600 + }, + { + "epoch": 100.0, + "step": 10600, + "total_flos": 4604018189212752.0, + "train_loss": 0.14351777022739626, + "train_runtime": 583.4317, + "train_samples_per_second": 289.323, + "train_steps_per_second": 18.168 + } + ], + "logging_steps": 500, + "max_steps": 10600, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 500, + "total_flos": 4604018189212752.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}