diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 100.0, + "eval_steps": 500, + "global_step": 9600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "grad_norm": 3.177828550338745, + "learning_rate": 4.9500000000000004e-05, + "loss": 0.2635, + "step": 96 + }, + { + "epoch": 1.0, + "eval_LOCATION_f1": 0.8866995073891626, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.8716417910447761, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.863905325443787, + "eval_ORGANIZATION_recall": 0.8795180722891566, + "eval_PERSON_f1": 0.9614035087719298, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.958041958041958, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.06304211914539337, + "eval_overall_accuracy": 0.9813443072702333, + "eval_overall_f1": 0.9064398541919806, + "eval_overall_precision": 0.8838862559241706, + "eval_overall_recall": 0.9301745635910225, + "eval_runtime": 0.6026, + "eval_samples_per_second": 282.106, + "eval_steps_per_second": 4.978, + "step": 96 + }, + { + "epoch": 2.0, + "grad_norm": 2.5327234268188477, + "learning_rate": 4.9e-05, + "loss": 0.0557, + "step": 192 + }, + { + "epoch": 2.0, + "eval_LOCATION_f1": 0.8866995073891626, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.8181818181818182, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.8800000000000001, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.89937106918239, + "eval_ORGANIZATION_recall": 0.8614457831325302, + "eval_PERSON_f1": 0.9681978798586572, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.060067467391490936, + "eval_overall_accuracy": 0.9813443072702333, + "eval_overall_f1": 0.9124537607891493, + "eval_overall_precision": 0.9024390243902439, + "eval_overall_recall": 0.9226932668329177, + "eval_runtime": 0.6458, + "eval_samples_per_second": 263.24, + "eval_steps_per_second": 4.645, + "step": 192 + }, + { + "epoch": 3.0, + "grad_norm": 0.6971762180328369, + "learning_rate": 4.85e-05, + "loss": 0.0324, + "step": 288 + }, + { + "epoch": 3.0, + "eval_LOCATION_f1": 0.9361702127659575, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9263157894736842, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9096209912536444, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.8813559322033898, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.058206915855407715, + "eval_overall_accuracy": 0.9865569272976681, + "eval_overall_f1": 0.9385749385749386, + "eval_overall_precision": 0.9249394673123487, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.6242, + "eval_samples_per_second": 272.341, + "eval_steps_per_second": 4.806, + "step": 288 + }, + { + "epoch": 4.0, + "grad_norm": 3.045093297958374, + "learning_rate": 4.8e-05, + "loss": 0.0198, + "step": 384 + }, + { + "epoch": 4.0, + "eval_LOCATION_f1": 0.9120879120879121, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9325842696629213, + "eval_LOCATION_recall": 0.8924731182795699, + "eval_ORGANIZATION_f1": 0.9144542772861357, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.8959537572254336, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06479041278362274, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9353233830845771, + "eval_overall_precision": 0.9330024813895782, + "eval_overall_recall": 0.9376558603491272, + "eval_runtime": 0.6071, + "eval_samples_per_second": 280.017, + "eval_steps_per_second": 4.941, + "step": 384 + }, + { + "epoch": 5.0, + "grad_norm": 4.877044677734375, + "learning_rate": 4.75e-05, + "loss": 0.0138, + "step": 480 + }, + { + "epoch": 5.0, + "eval_LOCATION_f1": 0.9326424870466321, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9085365853658537, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9197530864197531, + "eval_ORGANIZATION_recall": 0.8975903614457831, + "eval_PERSON_f1": 0.9681978798586572, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.06161973997950554, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.9353233830845771, + "eval_overall_precision": 0.9330024813895782, + "eval_overall_recall": 0.9376558603491272, + "eval_runtime": 0.6091, + "eval_samples_per_second": 279.082, + "eval_steps_per_second": 4.925, + "step": 480 + }, + { + "epoch": 6.0, + "grad_norm": 0.2726280689239502, + "learning_rate": 4.7e-05, + "loss": 0.0105, + "step": 576 + }, + { + "epoch": 6.0, + "eval_LOCATION_f1": 0.9175257731958764, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.8811881188118812, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.8909657320872274, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9225806451612903, + "eval_ORGANIZATION_recall": 0.8614457831325302, + "eval_PERSON_f1": 0.9754385964912281, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.972027972027972, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.07424049079418182, + "eval_overall_accuracy": 0.9860082304526749, + "eval_overall_f1": 0.9275, + "eval_overall_precision": 0.9298245614035088, + "eval_overall_recall": 0.9251870324189526, + "eval_runtime": 0.62, + "eval_samples_per_second": 274.215, + "eval_steps_per_second": 4.839, + "step": 576 + }, + { + "epoch": 7.0, + "grad_norm": 0.07776086032390594, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.0057, + "step": 672 + }, + { + "epoch": 7.0, + "eval_LOCATION_f1": 0.9361702127659575, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9263157894736842, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9337349397590361, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9337349397590361, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9787234042553192, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.07283920794725418, + "eval_overall_accuracy": 0.9887517146776406, + "eval_overall_f1": 0.9501246882793017, + "eval_overall_precision": 0.9501246882793017, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.6865, + "eval_samples_per_second": 247.643, + "eval_steps_per_second": 4.37, + "step": 672 + }, + { + "epoch": 8.0, + "grad_norm": 0.04480782151222229, + "learning_rate": 4.600000000000001e-05, + "loss": 0.0047, + "step": 768 + }, + { + "epoch": 8.0, + "eval_LOCATION_f1": 0.896551724137931, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.8272727272727273, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.8924050632911392, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.94, + "eval_ORGANIZATION_recall": 0.8493975903614458, + "eval_PERSON_f1": 0.9788732394366197, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.08292285352945328, + "eval_overall_accuracy": 0.9840877914951989, + "eval_overall_f1": 0.9240348692403486, + "eval_overall_precision": 0.9228855721393034, + "eval_overall_recall": 0.9251870324189526, + "eval_runtime": 0.6102, + "eval_samples_per_second": 278.593, + "eval_steps_per_second": 4.916, + "step": 768 + }, + { + "epoch": 9.0, + "grad_norm": 0.014586797915399075, + "learning_rate": 4.55e-05, + "loss": 0.0054, + "step": 864 + }, + { + "epoch": 9.0, + "eval_LOCATION_f1": 0.9191919191919192, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.8666666666666667, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9022082018927445, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9470198675496688, + "eval_ORGANIZATION_recall": 0.8614457831325302, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.08383848518133163, + "eval_overall_accuracy": 0.9862825788751715, + "eval_overall_f1": 0.9348370927318296, + "eval_overall_precision": 0.9395465994962217, + "eval_overall_recall": 0.9301745635910225, + "eval_runtime": 0.6187, + "eval_samples_per_second": 274.757, + "eval_steps_per_second": 4.849, + "step": 864 + }, + { + "epoch": 10.0, + "grad_norm": 1.5230215787887573, + "learning_rate": 4.5e-05, + "loss": 0.0034, + "step": 960 + }, + { + "epoch": 10.0, + "eval_LOCATION_f1": 0.9374999999999999, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9090909090909091, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9074626865671641, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.8994082840236687, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.0742967277765274, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9382716049382714, + "eval_overall_precision": 0.9290953545232273, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.6386, + "eval_samples_per_second": 266.218, + "eval_steps_per_second": 4.698, + "step": 960 + }, + { + "epoch": 11.0, + "grad_norm": 0.017022985965013504, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.0034, + "step": 1056 + }, + { + "epoch": 11.0, + "eval_LOCATION_f1": 0.9479166666666667, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9191919191919192, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9096385542168675, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9096385542168675, + "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.07502178102731705, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9442379182156134, + "eval_overall_precision": 0.9384236453201971, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.6164, + "eval_samples_per_second": 275.788, + "eval_steps_per_second": 4.867, + "step": 1056 + }, + { + "epoch": 12.0, + "grad_norm": 0.002780010225251317, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.0032, + "step": 1152 + }, + { + "epoch": 12.0, + "eval_LOCATION_f1": 0.9319371727748691, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9081632653061225, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.923076923076923, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9069767441860465, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9750889679715302, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9856115107913669, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.0745384469628334, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.9432098765432098, + "eval_overall_precision": 0.9339853300733496, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.6081, + "eval_samples_per_second": 279.572, + "eval_steps_per_second": 4.934, + "step": 1152 + }, + { + "epoch": 13.0, + "grad_norm": 0.019220953807234764, + "learning_rate": 4.35e-05, + "loss": 0.003, + "step": 1248 + }, + { + "epoch": 13.0, + "eval_LOCATION_f1": 0.9368421052631579, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9175257731958762, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9422492401215805, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.950920245398773, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9681978798586572, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.0791519358754158, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9501246882793017, + "eval_overall_precision": 0.9501246882793017, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.6266, + "eval_samples_per_second": 271.319, + "eval_steps_per_second": 4.788, + "step": 1248 + }, + { + "epoch": 14.0, + "grad_norm": 2.768113374710083, + "learning_rate": 4.3e-05, + "loss": 0.0027, + "step": 1344 + }, + { + "epoch": 14.0, + "eval_LOCATION_f1": 0.9417989417989417, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9270833333333334, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9329268292682927, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9444444444444444, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9716312056737589, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9785714285714285, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.07483148574829102, + "eval_overall_accuracy": 0.9882030178326474, + "eval_overall_f1": 0.9486858573216519, + "eval_overall_precision": 0.9522613065326633, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.6338, + "eval_samples_per_second": 268.226, + "eval_steps_per_second": 4.733, + "step": 1344 + }, + { + "epoch": 15.0, + "grad_norm": 1.6159311532974243, + "learning_rate": 4.25e-05, + "loss": 0.0025, + "step": 1440 + }, + { + "epoch": 15.0, + "eval_LOCATION_f1": 0.9319371727748691, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9081632653061225, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9309309309309309, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9281437125748503, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9716312056737589, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9785714285714285, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.07695868611335754, + "eval_overall_accuracy": 0.9882030178326474, + "eval_overall_f1": 0.9454094292803972, + "eval_overall_precision": 0.9407407407407408, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.6119, + "eval_samples_per_second": 277.812, + "eval_steps_per_second": 4.903, + "step": 1440 + }, + { + "epoch": 16.0, + "grad_norm": 3.242793321609497, + "learning_rate": 4.2e-05, + "loss": 0.0034, + "step": 1536 + }, + { + "epoch": 16.0, + "eval_LOCATION_f1": 0.923076923076923, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.8823529411764706, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9158878504672897, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9483870967741935, + "eval_ORGANIZATION_recall": 0.8855421686746988, + "eval_PERSON_f1": 0.9477351916376306, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9379310344827586, + "eval_PERSON_recall": 0.9577464788732394, + "eval_loss": 0.08768688887357712, + "eval_overall_accuracy": 0.9860082304526749, + "eval_overall_f1": 0.9290161892901619, + "eval_overall_precision": 0.927860696517413, + "eval_overall_recall": 0.9301745635910225, + "eval_runtime": 0.6115, + "eval_samples_per_second": 278.015, + "eval_steps_per_second": 4.906, + "step": 1536 + }, + { + "epoch": 17.0, + "grad_norm": 0.27599668502807617, + "learning_rate": 4.15e-05, + "loss": 0.0026, + "step": 1632 + }, + { + "epoch": 17.0, + "eval_LOCATION_f1": 0.9312169312169313, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9166666666666666, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9179331306990882, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9263803680981595, + "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_PERSON_f1": 0.9750889679715302, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9856115107913669, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.09132808446884155, + "eval_overall_accuracy": 0.9865569272976681, + "eval_overall_f1": 0.9411764705882354, + "eval_overall_precision": 0.9447236180904522, + "eval_overall_recall": 0.9376558603491272, + "eval_runtime": 0.6255, + "eval_samples_per_second": 271.769, + "eval_steps_per_second": 4.796, + "step": 1632 + }, + { + "epoch": 18.0, + "grad_norm": 0.01839255355298519, + "learning_rate": 4.1e-05, + "loss": 0.0036, + "step": 1728 + }, + { + "epoch": 18.0, + "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9240121580547112, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9325153374233128, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9580419580419581, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9513888888888888, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.09819579124450684, + "eval_overall_accuracy": 0.9865569272976681, + "eval_overall_f1": 0.9416149068322982, + "eval_overall_precision": 0.9381188118811881, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.6201, + "eval_samples_per_second": 274.14, + "eval_steps_per_second": 4.838, + "step": 1728 + }, + { + "epoch": 19.0, + "grad_norm": 0.0038679959252476692, + "learning_rate": 4.05e-05, + "loss": 0.0048, + "step": 1824 + }, + { + "epoch": 19.0, + "eval_LOCATION_f1": 0.9479166666666667, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9191919191919192, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.927899686520376, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9673202614379085, + "eval_ORGANIZATION_recall": 0.891566265060241, + "eval_PERSON_f1": 0.971830985915493, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.0962822213768959, + "eval_overall_accuracy": 0.9868312757201646, + "eval_overall_f1": 0.9484276729559747, + "eval_overall_precision": 0.9568527918781726, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.6256, + "eval_samples_per_second": 271.751, + "eval_steps_per_second": 4.796, + "step": 1824 + }, + { + "epoch": 20.0, + "grad_norm": 0.003253211732953787, + "learning_rate": 4e-05, + "loss": 0.003, + "step": 1920 + }, + { + "epoch": 20.0, + "eval_LOCATION_f1": 0.914572864321608, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.8584905660377359, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.915360501567398, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.954248366013072, + "eval_ORGANIZATION_recall": 0.8795180722891566, + "eval_PERSON_f1": 0.9754385964912281, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.972027972027972, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.07391735166311264, + "eval_overall_accuracy": 0.988477366255144, + "eval_overall_f1": 0.9364881693648817, + "eval_overall_precision": 0.9353233830845771, + "eval_overall_recall": 0.9376558603491272, + "eval_runtime": 0.695, + "eval_samples_per_second": 244.601, + "eval_steps_per_second": 4.316, + "step": 1920 + }, + { + "epoch": 21.0, + "grad_norm": 0.2520816922187805, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.0014, + "step": 2016 + }, + { + "epoch": 21.0, + "eval_LOCATION_f1": 0.909090909090909, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.8571428571428571, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9292307692307692, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.949685534591195, + "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.09125908464193344, + "eval_overall_accuracy": 0.9865569272976681, + "eval_overall_f1": 0.9404466501240694, + "eval_overall_precision": 0.9358024691358025, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.6133, + "eval_samples_per_second": 277.187, + "eval_steps_per_second": 4.892, + "step": 2016 + }, + { + "epoch": 22.0, + "grad_norm": 0.0016751234652474523, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.0013, + "step": 2112 + }, + { + "epoch": 22.0, + "eval_LOCATION_f1": 0.9523809523809523, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9375, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9636363636363636, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9695121951219512, + "eval_ORGANIZATION_recall": 0.9578313253012049, + "eval_PERSON_f1": 0.9787234042553192, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.07907005399465561, + "eval_overall_accuracy": 0.9912208504801098, + "eval_overall_f1": 0.9662921348314607, + "eval_overall_precision": 0.9675, + "eval_overall_recall": 0.9650872817955112, + "eval_runtime": 0.6284, + "eval_samples_per_second": 270.511, + "eval_steps_per_second": 4.774, + "step": 2112 + }, + { + "epoch": 23.0, + "grad_norm": 0.0011018295772373676, + "learning_rate": 3.85e-05, + "loss": 0.0028, + "step": 2208 + }, + { + "epoch": 23.0, + "eval_LOCATION_f1": 0.9326424870466321, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9382716049382716, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9620253164556962, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9750889679715302, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9856115107913669, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.08638212084770203, + "eval_overall_accuracy": 0.9882030178326474, + "eval_overall_f1": 0.949874686716792, + "eval_overall_precision": 0.9546599496221663, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.6091, + "eval_samples_per_second": 279.107, + "eval_steps_per_second": 4.925, + "step": 2208 + }, + { + "epoch": 24.0, + "grad_norm": 0.3309096395969391, + "learning_rate": 3.8e-05, + "loss": 0.0019, + "step": 2304 + }, + { + "epoch": 24.0, + "eval_LOCATION_f1": 0.9417989417989417, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9270833333333334, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9390243902439024, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9506172839506173, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.0859411209821701, + "eval_overall_accuracy": 0.9898491083676269, + "eval_overall_f1": 0.955, + "eval_overall_precision": 0.9573934837092731, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.6237, + "eval_samples_per_second": 272.548, + "eval_steps_per_second": 4.81, + "step": 2304 + }, + { + "epoch": 25.0, + "grad_norm": 1.852352261543274, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0021, + "step": 2400 + }, + { + "epoch": 25.0, + "eval_LOCATION_f1": 0.934065934065934, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9550561797752809, + "eval_LOCATION_recall": 0.9139784946236559, + "eval_ORGANIZATION_f1": 0.9425981873111783, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9454545454545454, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.971830985915493, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.08199941366910934, + "eval_overall_accuracy": 0.9887517146776406, + "eval_overall_f1": 0.9510664993726474, + "eval_overall_precision": 0.9570707070707071, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.616, + "eval_samples_per_second": 275.97, + "eval_steps_per_second": 4.87, + "step": 2400 + }, + { + "epoch": 26.0, + "grad_norm": 0.2104710191488266, + "learning_rate": 3.7e-05, + "loss": 0.0031, + "step": 2496 + }, + { + "epoch": 26.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9183673469387755, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9249999999999999, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.961038961038961, + "eval_ORGANIZATION_recall": 0.891566265060241, + "eval_PERSON_f1": 0.9679715302491103, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9784172661870504, + "eval_PERSON_recall": 0.9577464788732394, + "eval_loss": 0.1160167008638382, + "eval_overall_accuracy": 0.9851851851851852, + "eval_overall_f1": 0.9444444444444444, + "eval_overall_precision": 0.9565217391304348, + "eval_overall_recall": 0.9326683291770573, + "eval_runtime": 0.6243, + "eval_samples_per_second": 272.302, + "eval_steps_per_second": 4.805, + "step": 2496 + }, + { + "epoch": 27.0, + "grad_norm": 0.2863217294216156, + "learning_rate": 3.65e-05, + "loss": 0.0024, + "step": 2592 + }, + { + "epoch": 27.0, + "eval_LOCATION_f1": 0.9312169312169313, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9166666666666666, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9386503067484663, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.95625, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.965034965034965, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9583333333333334, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.0788722112774849, + "eval_overall_accuracy": 0.988477366255144, + "eval_overall_f1": 0.9463171036204744, + "eval_overall_precision": 0.9475, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.6209, + "eval_samples_per_second": 273.783, + "eval_steps_per_second": 4.831, + "step": 2592 + }, + { + "epoch": 28.0, + "grad_norm": 0.0017773299477994442, + "learning_rate": 3.6e-05, + "loss": 0.003, + "step": 2688 + }, + { + "epoch": 28.0, + "eval_LOCATION_f1": 0.9361702127659575, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9263157894736842, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9483282674772037, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9570552147239264, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9787234042553192, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.078119195997715, + "eval_overall_accuracy": 0.9895747599451303, + "eval_overall_f1": 0.9561952440550688, + "eval_overall_precision": 0.9597989949748744, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.622, + "eval_samples_per_second": 273.315, + "eval_steps_per_second": 4.823, + "step": 2688 + }, + { + "epoch": 29.0, + "grad_norm": 0.01796448975801468, + "learning_rate": 3.55e-05, + "loss": 0.0024, + "step": 2784 + }, + { + "epoch": 29.0, + "eval_LOCATION_f1": 0.9090909090909091, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9042553191489362, + "eval_LOCATION_recall": 0.9139784946236559, + "eval_ORGANIZATION_f1": 0.9518072289156626, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9518072289156626, + "eval_ORGANIZATION_recall": 0.9518072289156626, + "eval_PERSON_f1": 0.9716312056737589, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9785714285714285, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.082966648042202, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9488139825218477, + "eval_overall_precision": 0.95, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.6122, + "eval_samples_per_second": 277.689, + "eval_steps_per_second": 4.9, + "step": 2784 + }, + { + "epoch": 30.0, + "grad_norm": 0.01182488538324833, + "learning_rate": 3.5e-05, + "loss": 0.0017, + "step": 2880 + }, + { + "epoch": 30.0, + "eval_LOCATION_f1": 0.9072164948453608, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.8712871287128713, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9357798165137615, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9503105590062112, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9787234042553192, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.10598521679639816, + "eval_overall_accuracy": 0.9868312757201646, + "eval_overall_f1": 0.9439601494396015, + "eval_overall_precision": 0.9427860696517413, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.6131, + "eval_samples_per_second": 277.279, + "eval_steps_per_second": 4.893, + "step": 2880 + }, + { + "epoch": 31.0, + "grad_norm": 0.0008316845633089542, + "learning_rate": 3.45e-05, + "loss": 0.0013, + "step": 2976 + }, + { + "epoch": 31.0, + "eval_LOCATION_f1": 0.9417989417989417, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9270833333333334, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9418960244648319, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9565217391304348, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9647887323943662, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9647887323943662, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.0891905203461647, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.95, + "eval_overall_precision": 0.9523809523809523, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.6347, + "eval_samples_per_second": 267.83, + "eval_steps_per_second": 4.726, + "step": 2976 + }, + { + "epoch": 32.0, + "grad_norm": 0.004454879555851221, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.0013, + "step": 3072 + }, + { + "epoch": 32.0, + "eval_LOCATION_f1": 0.9333333333333333, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.8921568627450981, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9249999999999999, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.961038961038961, + "eval_ORGANIZATION_recall": 0.891566265060241, + "eval_PERSON_f1": 0.9616724738675958, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9517241379310345, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.12060756981372833, + "eval_overall_accuracy": 0.9849108367626886, + "eval_overall_f1": 0.940149625935162, + "eval_overall_precision": 0.940149625935162, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.6168, + "eval_samples_per_second": 275.635, + "eval_steps_per_second": 4.864, + "step": 3072 + }, + { + "epoch": 33.0, + "grad_norm": 1.9620002508163452, + "learning_rate": 3.35e-05, + "loss": 0.0029, + "step": 3168 + }, + { + "epoch": 33.0, + "eval_LOCATION_f1": 0.925531914893617, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9157894736842105, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9486404833836858, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9515151515151515, + "eval_ORGANIZATION_recall": 0.9457831325301205, + "eval_PERSON_f1": 0.9679715302491103, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9784172661870504, + "eval_PERSON_recall": 0.9577464788732394, + "eval_loss": 0.08239692449569702, + "eval_overall_accuracy": 0.9901234567901235, + "eval_overall_f1": 0.95, + "eval_overall_precision": 0.9523809523809523, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.6192, + "eval_samples_per_second": 274.561, + "eval_steps_per_second": 4.845, + "step": 3168 + }, + { + "epoch": 34.0, + "grad_norm": 8.319902420043945, + "learning_rate": 3.3e-05, + "loss": 0.0024, + "step": 3264 + }, + { + "epoch": 34.0, + "eval_LOCATION_f1": 0.922279792746114, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.89, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9483282674772037, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9570552147239264, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9679715302491103, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9784172661870504, + "eval_PERSON_recall": 0.9577464788732394, + "eval_loss": 0.08592518419027328, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9489414694894147, + "eval_overall_precision": 0.9477611940298507, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.6116, + "eval_samples_per_second": 277.948, + "eval_steps_per_second": 4.905, + "step": 3264 + }, + { + "epoch": 35.0, + "grad_norm": 0.0038952461909502745, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.0008, + "step": 3360 + }, + { + "epoch": 35.0, + "eval_LOCATION_f1": 0.9374999999999999, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9090909090909091, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9575757575757576, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9634146341463414, + "eval_ORGANIZATION_recall": 0.9518072289156626, + "eval_PERSON_f1": 0.9645390070921985, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9714285714285714, + "eval_PERSON_recall": 0.9577464788732394, + "eval_loss": 0.0747222974896431, + "eval_overall_accuracy": 0.9917695473251029, + "eval_overall_f1": 0.9552238805970149, + "eval_overall_precision": 0.9528535980148883, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6345, + "eval_samples_per_second": 267.913, + "eval_steps_per_second": 4.728, + "step": 3360 + }, + { + "epoch": 36.0, + "grad_norm": 0.0016899339389055967, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.0007, + "step": 3456 + }, + { + "epoch": 36.0, + "eval_LOCATION_f1": 0.9518716577540107, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9468085106382979, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.966565349544073, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9754601226993865, + "eval_ORGANIZATION_recall": 0.9578313253012049, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.07771290838718414, + "eval_overall_accuracy": 0.9920438957475994, + "eval_overall_f1": 0.9662077596996246, + "eval_overall_precision": 0.9698492462311558, + "eval_overall_recall": 0.9625935162094763, + "eval_runtime": 0.6791, + "eval_samples_per_second": 250.319, + "eval_steps_per_second": 4.417, + "step": 3456 + }, + { + "epoch": 37.0, + "grad_norm": 0.08993358165025711, + "learning_rate": 3.15e-05, + "loss": 0.0012, + "step": 3552 + }, + { + "epoch": 37.0, + "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9573170731707317, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9691358024691358, + "eval_ORGANIZATION_recall": 0.9457831325301205, + "eval_PERSON_f1": 0.9750889679715302, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9856115107913669, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.08484144508838654, + "eval_overall_accuracy": 0.9909465020576131, + "eval_overall_f1": 0.9612015018773465, + "eval_overall_precision": 0.964824120603015, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6173, + "eval_samples_per_second": 275.378, + "eval_steps_per_second": 4.86, + "step": 3552 + }, + { + "epoch": 38.0, + "grad_norm": 0.004345588851720095, + "learning_rate": 3.1e-05, + "loss": 0.0017, + "step": 3648 + }, + { + "epoch": 38.0, + "eval_LOCATION_f1": 0.9523809523809523, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9375, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9631901840490796, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.98125, + "eval_ORGANIZATION_recall": 0.9457831325301205, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.0790277048945427, + "eval_overall_accuracy": 0.9917695473251029, + "eval_overall_f1": 0.9674185463659148, + "eval_overall_precision": 0.9722921914357683, + "eval_overall_recall": 0.9625935162094763, + "eval_runtime": 0.6178, + "eval_samples_per_second": 275.165, + "eval_steps_per_second": 4.856, + "step": 3648 + }, + { + "epoch": 39.0, + "grad_norm": 0.003032927168533206, + "learning_rate": 3.05e-05, + "loss": 0.0014, + "step": 3744 + }, + { + "epoch": 39.0, + "eval_LOCATION_f1": 0.9326424870466321, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.918429003021148, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9212121212121213, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.08661574125289917, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9442379182156134, + "eval_overall_precision": 0.9384236453201971, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.6256, + "eval_samples_per_second": 271.73, + "eval_steps_per_second": 4.795, + "step": 3744 + }, + { + "epoch": 40.0, + "grad_norm": 0.0013004345819354057, + "learning_rate": 3e-05, + "loss": 0.0011, + "step": 3840 + }, + { + "epoch": 40.0, + "eval_LOCATION_f1": 0.9523809523809523, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9375, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9353846153846154, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9559748427672956, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.08830577880144119, + "eval_overall_accuracy": 0.9901234567901235, + "eval_overall_f1": 0.9560853199498118, + "eval_overall_precision": 0.9621212121212122, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.6147, + "eval_samples_per_second": 276.564, + "eval_steps_per_second": 4.881, + "step": 3840 + }, + { + "epoch": 41.0, + "grad_norm": 0.0013941420475021005, + "learning_rate": 2.95e-05, + "loss": 0.0012, + "step": 3936 + }, + { + "epoch": 41.0, + "eval_LOCATION_f1": 0.9285714285714286, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.883495145631068, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.920245398773006, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9375, + "eval_ORGANIZATION_recall": 0.9036144578313253, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.08596080541610718, + "eval_overall_accuracy": 0.9901234567901235, + "eval_overall_f1": 0.9416149068322982, + "eval_overall_precision": 0.9381188118811881, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.6179, + "eval_samples_per_second": 275.122, + "eval_steps_per_second": 4.855, + "step": 3936 + }, + { + "epoch": 42.0, + "grad_norm": 4.352799415588379, + "learning_rate": 2.9e-05, + "loss": 0.0009, + "step": 4032 + }, + { + "epoch": 42.0, + "eval_LOCATION_f1": 0.9411764705882354, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9361702127659575, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9512195121951219, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9629629629629629, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9788732394366197, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.07902759313583374, + "eval_overall_accuracy": 0.9909465020576131, + "eval_overall_f1": 0.9586983729662076, + "eval_overall_precision": 0.9623115577889447, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.6298, + "eval_samples_per_second": 269.919, + "eval_steps_per_second": 4.763, + "step": 4032 + }, + { + "epoch": 43.0, + "grad_norm": 0.0006095858407206833, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.0008, + "step": 4128 + }, + { + "epoch": 43.0, + "eval_LOCATION_f1": 0.956989247311828, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.956989247311828, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9285714285714286, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9176470588235294, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.08305903524160385, + "eval_overall_accuracy": 0.99039780521262, + "eval_overall_f1": 0.9540372670807453, + "eval_overall_precision": 0.9504950495049505, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6152, + "eval_samples_per_second": 276.336, + "eval_steps_per_second": 4.877, + "step": 4128 + }, + { + "epoch": 44.0, + "grad_norm": 0.0003273843030910939, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.0015, + "step": 4224 + }, + { + "epoch": 44.0, + "eval_LOCATION_f1": 0.9417989417989417, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9270833333333334, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9393939393939394, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9451219512195121, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9716312056737589, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9785714285714285, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.0857364758849144, + "eval_overall_accuracy": 0.99039780521262, + "eval_overall_f1": 0.951310861423221, + "eval_overall_precision": 0.9525, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.7302, + "eval_samples_per_second": 232.812, + "eval_steps_per_second": 4.108, + "step": 4224 + }, + { + "epoch": 45.0, + "grad_norm": 0.0011686523212119937, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.0009, + "step": 4320 + }, + { + "epoch": 45.0, + "eval_LOCATION_f1": 0.9312169312169313, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9166666666666666, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9483282674772037, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9570552147239264, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9787234042553192, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.09148573875427246, + "eval_overall_accuracy": 0.9901234567901235, + "eval_overall_f1": 0.955, + "eval_overall_precision": 0.9573934837092731, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.6261, + "eval_samples_per_second": 271.505, + "eval_steps_per_second": 4.791, + "step": 4320 + }, + { + "epoch": 46.0, + "grad_norm": 0.0008258108282461762, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.0009, + "step": 4416 + }, + { + "epoch": 46.0, + "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9483282674772037, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9570552147239264, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.07752946019172668, + "eval_overall_accuracy": 0.9914951989026063, + "eval_overall_f1": 0.9600997506234414, + "eval_overall_precision": 0.9600997506234414, + "eval_overall_recall": 0.9600997506234414, + "eval_runtime": 0.6241, + "eval_samples_per_second": 272.385, + "eval_steps_per_second": 4.807, + "step": 4416 + }, + { + "epoch": 47.0, + "grad_norm": 0.0041161938570439816, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.0016, + "step": 4512 + }, + { + "epoch": 47.0, + "eval_LOCATION_f1": 0.946808510638298, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9368421052631579, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9604863221884498, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9693251533742331, + "eval_ORGANIZATION_recall": 0.9518072289156626, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.08538774400949478, + "eval_overall_accuracy": 0.9912208504801098, + "eval_overall_f1": 0.965, + "eval_overall_precision": 0.9674185463659147, + "eval_overall_recall": 0.9625935162094763, + "eval_runtime": 0.6148, + "eval_samples_per_second": 276.504, + "eval_steps_per_second": 4.879, + "step": 4512 + }, + { + "epoch": 48.0, + "grad_norm": 0.0011704850476235151, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.001, + "step": 4608 + }, + { + "epoch": 48.0, + "eval_LOCATION_f1": 0.9368421052631579, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9175257731958762, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9573170731707317, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9691358024691358, + "eval_ORGANIZATION_recall": 0.9457831325301205, + "eval_PERSON_f1": 0.9750889679715302, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9856115107913669, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.08850877732038498, + "eval_overall_accuracy": 0.9906721536351166, + "eval_overall_f1": 0.9586983729662076, + "eval_overall_precision": 0.9623115577889447, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.6274, + "eval_samples_per_second": 270.979, + "eval_steps_per_second": 4.782, + "step": 4608 + }, + { + "epoch": 49.0, + "grad_norm": 0.0036570949014276266, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.0021, + "step": 4704 + }, + { + "epoch": 49.0, + "eval_LOCATION_f1": 0.9479166666666667, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9191919191919192, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9390243902439024, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9506172839506173, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.07643292844295502, + "eval_overall_accuracy": 0.9906721536351166, + "eval_overall_f1": 0.9564134495641344, + "eval_overall_precision": 0.9552238805970149, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6334, + "eval_samples_per_second": 268.376, + "eval_steps_per_second": 4.736, + "step": 4704 + }, + { + "epoch": 50.0, + "grad_norm": 0.0008804846438579261, + "learning_rate": 2.5e-05, + "loss": 0.0015, + "step": 4800 + }, + { + "epoch": 50.0, + "eval_LOCATION_f1": 0.9574468085106383, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9473684210526315, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9480122324159022, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9627329192546584, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9788732394366197, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.08237197250127792, + "eval_overall_accuracy": 0.9906721536351166, + "eval_overall_f1": 0.9612015018773465, + "eval_overall_precision": 0.964824120603015, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6211, + "eval_samples_per_second": 273.711, + "eval_steps_per_second": 4.83, + "step": 4800 + }, + { + "epoch": 51.0, + "grad_norm": 0.0002897994127124548, + "learning_rate": 2.45e-05, + "loss": 0.0013, + "step": 4896 + }, + { + "epoch": 51.0, + "eval_LOCATION_f1": 0.9374999999999999, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9090909090909091, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9361702127659575, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9447852760736196, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9787234042553192, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.09424282610416412, + "eval_overall_accuracy": 0.988477366255144, + "eval_overall_f1": 0.9514321295143213, + "eval_overall_precision": 0.9502487562189055, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.6204, + "eval_samples_per_second": 274.019, + "eval_steps_per_second": 4.836, + "step": 4896 + }, + { + "epoch": 52.0, + "grad_norm": 0.0019835070706903934, + "learning_rate": 2.4e-05, + "loss": 0.0006, + "step": 4992 + }, + { + "epoch": 52.0, + "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9183673469387755, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9451219512195123, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9567901234567902, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9787234042553192, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.08988349884748459, + "eval_overall_accuracy": 0.9901234567901235, + "eval_overall_f1": 0.9563046192259677, + "eval_overall_precision": 0.9575, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.6254, + "eval_samples_per_second": 271.807, + "eval_steps_per_second": 4.797, + "step": 4992 + }, + { + "epoch": 53.0, + "grad_norm": 0.0007001986959949136, + "learning_rate": 2.35e-05, + "loss": 0.0022, + "step": 5088 + }, + { + "epoch": 53.0, + "eval_LOCATION_f1": 0.9312169312169313, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9166666666666666, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9425981873111783, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9454545454545454, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9787234042553192, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.08717399090528488, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9526184538653366, + "eval_overall_precision": 0.9526184538653366, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.6209, + "eval_samples_per_second": 273.784, + "eval_steps_per_second": 4.831, + "step": 5088 + }, + { + "epoch": 54.0, + "grad_norm": 0.0007513869786635041, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.0012, + "step": 5184 + }, + { + "epoch": 54.0, + "eval_LOCATION_f1": 0.9417989417989417, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9270833333333334, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9390243902439024, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9506172839506173, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.08728013932704926, + "eval_overall_accuracy": 0.9895747599451303, + "eval_overall_f1": 0.955, + "eval_overall_precision": 0.9573934837092731, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.6289, + "eval_samples_per_second": 270.316, + "eval_steps_per_second": 4.77, + "step": 5184 + }, + { + "epoch": 55.0, + "grad_norm": 0.002391280373558402, + "learning_rate": 2.25e-05, + "loss": 0.0006, + "step": 5280 + }, + { + "epoch": 55.0, + "eval_LOCATION_f1": 0.9368421052631579, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9175257731958762, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9329268292682927, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9444444444444444, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9679715302491103, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9784172661870504, + "eval_PERSON_recall": 0.9577464788732394, + "eval_loss": 0.09953483939170837, + "eval_overall_accuracy": 0.988477366255144, + "eval_overall_f1": 0.9461827284105131, + "eval_overall_precision": 0.949748743718593, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.6223, + "eval_samples_per_second": 273.189, + "eval_steps_per_second": 4.821, + "step": 5280 + }, + { + "epoch": 56.0, + "grad_norm": 0.0005434587365016341, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.0002, + "step": 5376 + }, + { + "epoch": 56.0, + "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9268292682926829, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9382716049382716, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9754385964912281, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.972027972027972, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.0964677706360817, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9489414694894147, + "eval_overall_precision": 0.9477611940298507, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.6222, + "eval_samples_per_second": 273.204, + "eval_steps_per_second": 4.821, + "step": 5376 + }, + { + "epoch": 57.0, + "grad_norm": 4.469410419464111, + "learning_rate": 2.15e-05, + "loss": 0.0005, + "step": 5472 + }, + { + "epoch": 57.0, + "eval_LOCATION_f1": 0.9319371727748691, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9081632653061225, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9345794392523364, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.967741935483871, + "eval_ORGANIZATION_recall": 0.9036144578313253, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.10861176997423172, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.9484276729559747, + "eval_overall_precision": 0.9568527918781726, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.6799, + "eval_samples_per_second": 250.049, + "eval_steps_per_second": 4.413, + "step": 5472 + }, + { + "epoch": 58.0, + "grad_norm": 0.00019852245168294758, + "learning_rate": 2.1e-05, + "loss": 0.0003, + "step": 5568 + }, + { + "epoch": 58.0, + "eval_LOCATION_f1": 0.9574468085106383, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9473684210526315, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9329268292682927, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9444444444444444, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9788732394366197, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.10073772817850113, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.955, + "eval_overall_precision": 0.9573934837092731, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.6284, + "eval_samples_per_second": 270.531, + "eval_steps_per_second": 4.774, + "step": 5568 + }, + { + "epoch": 59.0, + "grad_norm": 0.0002313832810614258, + "learning_rate": 2.05e-05, + "loss": 0.0002, + "step": 5664 + }, + { + "epoch": 59.0, + "eval_LOCATION_f1": 0.9574468085106383, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9473684210526315, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9333333333333333, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9390243902439024, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9754385964912281, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.972027972027972, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.09882505238056183, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.953922789539228, + "eval_overall_precision": 0.9527363184079602, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.6315, + "eval_samples_per_second": 269.214, + "eval_steps_per_second": 4.751, + "step": 5664 + }, + { + "epoch": 60.0, + "grad_norm": 0.0024265151005238295, + "learning_rate": 2e-05, + "loss": 0.0005, + "step": 5760 + }, + { + "epoch": 60.0, + "eval_LOCATION_f1": 0.9411764705882354, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9361702127659575, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9418960244648319, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9565217391304348, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9754385964912281, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.972027972027972, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.09421272575855255, + "eval_overall_accuracy": 0.99039780521262, + "eval_overall_f1": 0.9536921151439299, + "eval_overall_precision": 0.957286432160804, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.6035, + "eval_samples_per_second": 281.667, + "eval_steps_per_second": 4.971, + "step": 5760 + }, + { + "epoch": 61.0, + "grad_norm": 0.003336414461955428, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.0014, + "step": 5856 + }, + { + "epoch": 61.0, + "eval_LOCATION_f1": 0.9368421052631579, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9175257731958762, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9263803680981595, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.94375, + "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_PERSON_f1": 0.971830985915493, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11807473003864288, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9450000000000001, + "eval_overall_precision": 0.9473684210526315, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.6134, + "eval_samples_per_second": 277.153, + "eval_steps_per_second": 4.891, + "step": 5856 + }, + { + "epoch": 62.0, + "grad_norm": 0.0005696592852473259, + "learning_rate": 1.9e-05, + "loss": 0.0009, + "step": 5952 + }, + { + "epoch": 62.0, + "eval_LOCATION_f1": 0.9523809523809523, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9375, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9361702127659575, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9447852760736196, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9788732394366197, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.09385991096496582, + "eval_overall_accuracy": 0.99039780521262, + "eval_overall_f1": 0.9551122194513716, + "eval_overall_precision": 0.9551122194513716, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.636, + "eval_samples_per_second": 267.307, + "eval_steps_per_second": 4.717, + "step": 5952 + }, + { + "epoch": 63.0, + "grad_norm": 0.0002308748516952619, + "learning_rate": 1.85e-05, + "loss": 0.0003, + "step": 6048 + }, + { + "epoch": 63.0, + "eval_LOCATION_f1": 0.9523809523809523, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9375, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9425981873111783, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9454545454545454, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.08594703674316406, + "eval_overall_accuracy": 0.9909465020576131, + "eval_overall_f1": 0.9589041095890413, + "eval_overall_precision": 0.9577114427860697, + "eval_overall_recall": 0.9600997506234414, + "eval_runtime": 0.631, + "eval_samples_per_second": 269.405, + "eval_steps_per_second": 4.754, + "step": 6048 + }, + { + "epoch": 64.0, + "grad_norm": 0.00040351462666876614, + "learning_rate": 1.8e-05, + "loss": 0.0008, + "step": 6144 + }, + { + "epoch": 64.0, + "eval_LOCATION_f1": 0.962962962962963, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9479166666666666, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9361702127659575, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9447852760736196, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.972027972027972, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9652777777777778, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.09418027848005295, + "eval_overall_accuracy": 0.99039780521262, + "eval_overall_f1": 0.9552238805970149, + "eval_overall_precision": 0.9528535980148883, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6114, + "eval_samples_per_second": 278.047, + "eval_steps_per_second": 4.907, + "step": 6144 + }, + { + "epoch": 65.0, + "grad_norm": 0.0014753889990970492, + "learning_rate": 1.75e-05, + "loss": 0.0005, + "step": 6240 + }, + { + "epoch": 65.0, + "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9300911854103343, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9386503067484663, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9754385964912281, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.972027972027972, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.09385820478200912, + "eval_overall_accuracy": 0.9895747599451303, + "eval_overall_f1": 0.9502487562189055, + "eval_overall_precision": 0.9478908188585607, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.6851, + "eval_samples_per_second": 248.156, + "eval_steps_per_second": 4.379, + "step": 6240 + }, + { + "epoch": 66.0, + "grad_norm": 0.001284662983380258, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.0002, + "step": 6336 + }, + { + "epoch": 66.0, + "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9300911854103343, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9386503067484663, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9754385964912281, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.972027972027972, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.09491738677024841, + "eval_overall_accuracy": 0.9895747599451303, + "eval_overall_f1": 0.9502487562189055, + "eval_overall_precision": 0.9478908188585607, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.623, + "eval_samples_per_second": 272.873, + "eval_steps_per_second": 4.815, + "step": 6336 + }, + { + "epoch": 67.0, + "grad_norm": 0.0013459660112857819, + "learning_rate": 1.65e-05, + "loss": 0.0003, + "step": 6432 + }, + { + "epoch": 67.0, + "eval_LOCATION_f1": 0.9574468085106383, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9473684210526315, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9305135951661632, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9333333333333333, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9754385964912281, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.972027972027972, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.08824346959590912, + "eval_overall_accuracy": 0.99039780521262, + "eval_overall_f1": 0.9527363184079602, + "eval_overall_precision": 0.9503722084367245, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.6008, + "eval_samples_per_second": 282.936, + "eval_steps_per_second": 4.993, + "step": 6432 + }, + { + "epoch": 68.0, + "grad_norm": 0.002200409071519971, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.0013, + "step": 6528 + }, + { + "epoch": 68.0, + "eval_LOCATION_f1": 0.9528795811518325, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9285714285714286, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9329268292682927, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9444444444444444, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9788732394366197, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.09217658638954163, + "eval_overall_accuracy": 0.9901234567901235, + "eval_overall_f1": 0.953922789539228, + "eval_overall_precision": 0.9527363184079602, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.6431, + "eval_samples_per_second": 264.331, + "eval_steps_per_second": 4.665, + "step": 6528 + }, + { + "epoch": 69.0, + "grad_norm": 0.00040152997826226056, + "learning_rate": 1.55e-05, + "loss": 0.0004, + "step": 6624 + }, + { + "epoch": 69.0, + "eval_LOCATION_f1": 0.9523809523809523, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9375, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9425981873111783, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9454545454545454, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.09245934337377548, + "eval_overall_accuracy": 0.99039780521262, + "eval_overall_f1": 0.9589041095890413, + "eval_overall_precision": 0.9577114427860697, + "eval_overall_recall": 0.9600997506234414, + "eval_runtime": 0.6295, + "eval_samples_per_second": 270.059, + "eval_steps_per_second": 4.766, + "step": 6624 + }, + { + "epoch": 70.0, + "grad_norm": 0.00021879332780372351, + "learning_rate": 1.5e-05, + "loss": 0.0014, + "step": 6720 + }, + { + "epoch": 70.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9397590361445783, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9397590361445783, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.09780900925397873, + "eval_overall_accuracy": 0.9898491083676269, + "eval_overall_f1": 0.9590062111801242, + "eval_overall_precision": 0.9554455445544554, + "eval_overall_recall": 0.9625935162094763, + "eval_runtime": 0.6084, + "eval_samples_per_second": 279.416, + "eval_steps_per_second": 4.931, + "step": 6720 + }, + { + "epoch": 71.0, + "grad_norm": 0.12344560027122498, + "learning_rate": 1.45e-05, + "loss": 0.0003, + "step": 6816 + }, + { + "epoch": 71.0, + "eval_LOCATION_f1": 0.9523809523809523, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9375, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.927710843373494, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.927710843373494, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.09870771318674088, + "eval_overall_accuracy": 0.9887517146776406, + "eval_overall_f1": 0.9502487562189055, + "eval_overall_precision": 0.9478908188585607, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.6127, + "eval_samples_per_second": 277.443, + "eval_steps_per_second": 4.896, + "step": 6816 + }, + { + "epoch": 72.0, + "grad_norm": 0.0003804394800681621, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.0002, + "step": 6912 + }, + { + "epoch": 72.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9361702127659575, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9447852760736196, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.971830985915493, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.10656601190567017, + "eval_overall_accuracy": 0.988477366255144, + "eval_overall_f1": 0.953922789539228, + "eval_overall_precision": 0.9527363184079602, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.6259, + "eval_samples_per_second": 271.591, + "eval_steps_per_second": 4.793, + "step": 6912 + }, + { + "epoch": 73.0, + "grad_norm": 0.0002077179087791592, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.0002, + "step": 7008 + }, + { + "epoch": 73.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9333333333333333, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9390243902439024, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.971830985915493, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.10909091681241989, + "eval_overall_accuracy": 0.9887517146776406, + "eval_overall_f1": 0.9527363184079602, + "eval_overall_precision": 0.9503722084367245, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.6237, + "eval_samples_per_second": 272.587, + "eval_steps_per_second": 4.81, + "step": 7008 + }, + { + "epoch": 74.0, + "grad_norm": 0.00039670878322795033, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.0002, + "step": 7104 + }, + { + "epoch": 74.0, + "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9240121580547112, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9325153374233128, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11193250864744186, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.9476309226932669, + "eval_overall_precision": 0.9476309226932669, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.7525, + "eval_samples_per_second": 225.919, + "eval_steps_per_second": 3.987, + "step": 7104 + }, + { + "epoch": 75.0, + "grad_norm": 0.0014596916735172272, + "learning_rate": 1.25e-05, + "loss": 0.0003, + "step": 7200 + }, + { + "epoch": 75.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9244712990936556, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9272727272727272, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9788732394366197, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.10835234820842743, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9515527950310558, + "eval_overall_precision": 0.948019801980198, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.636, + "eval_samples_per_second": 267.286, + "eval_steps_per_second": 4.717, + "step": 7200 + }, + { + "epoch": 76.0, + "grad_norm": 0.0014675238635390997, + "learning_rate": 1.2e-05, + "loss": 0.0011, + "step": 7296 + }, + { + "epoch": 76.0, + "eval_LOCATION_f1": 0.9523809523809523, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9375, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9333333333333333, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9390243902439024, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9788732394366197, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.10865047574043274, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.953922789539228, + "eval_overall_precision": 0.9527363184079602, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.6288, + "eval_samples_per_second": 270.367, + "eval_steps_per_second": 4.771, + "step": 7296 + }, + { + "epoch": 77.0, + "grad_norm": 0.00019736203830689192, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.0002, + "step": 7392 + }, + { + "epoch": 77.0, + "eval_LOCATION_f1": 0.9523809523809523, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9375, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9422492401215805, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.950920245398773, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9788732394366197, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.10928314179182053, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9576059850374065, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6019, + "eval_samples_per_second": 282.445, + "eval_steps_per_second": 4.984, + "step": 7392 + }, + { + "epoch": 78.0, + "grad_norm": 0.00022058725880924612, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0002, + "step": 7488 + }, + { + "epoch": 78.0, + "eval_LOCATION_f1": 0.9523809523809523, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9375, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9422492401215805, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.950920245398773, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9788732394366197, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.10950414836406708, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9576059850374065, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6337, + "eval_samples_per_second": 268.27, + "eval_steps_per_second": 4.734, + "step": 7488 + }, + { + "epoch": 79.0, + "grad_norm": 0.00011872066534124315, + "learning_rate": 1.05e-05, + "loss": 0.0004, + "step": 7584 + }, + { + "epoch": 79.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9333333333333333, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9390243902439024, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9788732394366197, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.10740546882152557, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9552238805970149, + "eval_overall_precision": 0.9528535980148883, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6231, + "eval_samples_per_second": 272.826, + "eval_steps_per_second": 4.815, + "step": 7584 + }, + { + "epoch": 80.0, + "grad_norm": 0.0006631407304666936, + "learning_rate": 1e-05, + "loss": 0.0003, + "step": 7680 + }, + { + "epoch": 80.0, + "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9329268292682927, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9444444444444444, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11605256050825119, + "eval_overall_accuracy": 0.988477366255144, + "eval_overall_f1": 0.951310861423221, + "eval_overall_precision": 0.9525, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.6131, + "eval_samples_per_second": 277.277, + "eval_steps_per_second": 4.893, + "step": 7680 + }, + { + "epoch": 81.0, + "grad_norm": 0.1374320536851883, + "learning_rate": 9.5e-06, + "loss": 0.0002, + "step": 7776 + }, + { + "epoch": 81.0, + "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9361702127659575, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9447852760736196, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.1175515204668045, + "eval_overall_accuracy": 0.9887517146776406, + "eval_overall_f1": 0.9526184538653366, + "eval_overall_precision": 0.9526184538653366, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.6375, + "eval_samples_per_second": 266.669, + "eval_steps_per_second": 4.706, + "step": 7776 + }, + { + "epoch": 82.0, + "grad_norm": 1.5108898878097534, + "learning_rate": 9e-06, + "loss": 0.0003, + "step": 7872 + }, + { + "epoch": 82.0, + "eval_LOCATION_f1": 0.9523809523809523, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9375, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9422492401215805, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.950920245398773, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11446201056241989, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9563046192259677, + "eval_overall_precision": 0.9575, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.6114, + "eval_samples_per_second": 278.061, + "eval_steps_per_second": 4.907, + "step": 7872 + }, + { + "epoch": 83.0, + "grad_norm": 0.00013873293937649578, + "learning_rate": 8.500000000000002e-06, + "loss": 0.0002, + "step": 7968 + }, + { + "epoch": 83.0, + "eval_LOCATION_f1": 0.9417989417989417, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9270833333333334, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9492537313432836, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9408284023668639, + "eval_ORGANIZATION_recall": 0.9578313253012049, + "eval_PERSON_f1": 0.9681978798586572, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.10357999056577682, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9541511771995044, + "eval_overall_precision": 0.9482758620689655, + "eval_overall_recall": 0.9600997506234414, + "eval_runtime": 0.615, + "eval_samples_per_second": 276.423, + "eval_steps_per_second": 4.878, + "step": 7968 + }, + { + "epoch": 84.0, + "grad_norm": 0.0008929019095376134, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0004, + "step": 8064 + }, + { + "epoch": 84.0, + "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.9489489489489489, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9461077844311377, + "eval_ORGANIZATION_recall": 0.9518072289156626, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.1038927435874939, + "eval_overall_accuracy": 0.9895747599451303, + "eval_overall_f1": 0.9578163771712158, + "eval_overall_precision": 0.9530864197530864, + "eval_overall_recall": 0.9625935162094763, + "eval_runtime": 0.6194, + "eval_samples_per_second": 274.479, + "eval_steps_per_second": 4.844, + "step": 8064 + }, + { + "epoch": 85.0, + "grad_norm": 0.000778336834628135, + "learning_rate": 7.5e-06, + "loss": 0.0008, + "step": 8160 + }, + { + "epoch": 85.0, + "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.942942942942943, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9401197604790419, + "eval_ORGANIZATION_recall": 0.9457831325301205, + "eval_PERSON_f1": 0.9681978798586572, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.10426676273345947, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9528535980148883, + "eval_overall_precision": 0.9481481481481482, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6185, + "eval_samples_per_second": 274.847, + "eval_steps_per_second": 4.85, + "step": 8160 + }, + { + "epoch": 86.0, + "grad_norm": 0.00020486340508796275, + "learning_rate": 7.000000000000001e-06, + "loss": 0.0003, + "step": 8256 + }, + { + "epoch": 86.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9329268292682927, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9444444444444444, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11413931101560593, + "eval_overall_accuracy": 0.988477366255144, + "eval_overall_f1": 0.9538077403245941, + "eval_overall_precision": 0.955, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.6171, + "eval_samples_per_second": 275.492, + "eval_steps_per_second": 4.862, + "step": 8256 + }, + { + "epoch": 87.0, + "grad_norm": 0.00035874126479029655, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.0002, + "step": 8352 + }, + { + "epoch": 87.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9451219512195123, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9567901234567902, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11689982563257217, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9588014981273408, + "eval_overall_precision": 0.96, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6253, + "eval_samples_per_second": 271.889, + "eval_steps_per_second": 4.798, + "step": 8352 + }, + { + "epoch": 88.0, + "grad_norm": 0.00014164223102852702, + "learning_rate": 6e-06, + "loss": 0.0002, + "step": 8448 + }, + { + "epoch": 88.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9451219512195123, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9567901234567902, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11667127907276154, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9588014981273408, + "eval_overall_precision": 0.96, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6215, + "eval_samples_per_second": 273.552, + "eval_steps_per_second": 4.827, + "step": 8448 + }, + { + "epoch": 89.0, + "grad_norm": 0.00030703318770974874, + "learning_rate": 5.500000000000001e-06, + "loss": 0.0002, + "step": 8544 + }, + { + "epoch": 89.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9451219512195123, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9567901234567902, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11650407314300537, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9588014981273408, + "eval_overall_precision": 0.96, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6113, + "eval_samples_per_second": 278.103, + "eval_steps_per_second": 4.908, + "step": 8544 + }, + { + "epoch": 90.0, + "grad_norm": 0.0004165687714703381, + "learning_rate": 5e-06, + "loss": 0.0002, + "step": 8640 + }, + { + "epoch": 90.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9451219512195123, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9567901234567902, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11252254992723465, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9588014981273408, + "eval_overall_precision": 0.96, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6218, + "eval_samples_per_second": 273.408, + "eval_steps_per_second": 4.825, + "step": 8640 + }, + { + "epoch": 91.0, + "grad_norm": 0.000288874696707353, + "learning_rate": 4.5e-06, + "loss": 0.0001, + "step": 8736 + }, + { + "epoch": 91.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9451219512195123, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9567901234567902, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11149457842111588, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9588014981273408, + "eval_overall_precision": 0.96, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.7004, + "eval_samples_per_second": 242.73, + "eval_steps_per_second": 4.283, + "step": 8736 + }, + { + "epoch": 92.0, + "grad_norm": 0.0001853927387855947, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0001, + "step": 8832 + }, + { + "epoch": 92.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9451219512195123, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9567901234567902, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.1115955114364624, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9588014981273408, + "eval_overall_precision": 0.96, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6188, + "eval_samples_per_second": 274.74, + "eval_steps_per_second": 4.848, + "step": 8832 + }, + { + "epoch": 93.0, + "grad_norm": 0.00011335952149238437, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.0001, + "step": 8928 + }, + { + "epoch": 93.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9451219512195123, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9567901234567902, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11149124056100845, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9588014981273408, + "eval_overall_precision": 0.96, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6327, + "eval_samples_per_second": 268.687, + "eval_steps_per_second": 4.742, + "step": 8928 + }, + { + "epoch": 94.0, + "grad_norm": 0.00019545151735655963, + "learning_rate": 3e-06, + "loss": 0.0002, + "step": 9024 + }, + { + "epoch": 94.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9451219512195123, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.9567901234567902, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11211737245321274, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9588014981273408, + "eval_overall_precision": 0.96, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6287, + "eval_samples_per_second": 270.382, + "eval_steps_per_second": 4.771, + "step": 9024 + }, + { + "epoch": 95.0, + "grad_norm": 0.00020475259225349873, + "learning_rate": 2.5e-06, + "loss": 0.0002, + "step": 9120 + }, + { + "epoch": 95.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9422492401215805, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.950920245398773, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.1112418919801712, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9576059850374065, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6086, + "eval_samples_per_second": 279.32, + "eval_steps_per_second": 4.929, + "step": 9120 + }, + { + "epoch": 96.0, + "grad_norm": 0.00014345929957926273, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.0002, + "step": 9216 + }, + { + "epoch": 96.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9422492401215805, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.950920245398773, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11125125735998154, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9576059850374065, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6107, + "eval_samples_per_second": 278.366, + "eval_steps_per_second": 4.912, + "step": 9216 + }, + { + "epoch": 97.0, + "grad_norm": 8.01074638729915e-05, + "learning_rate": 1.5e-06, + "loss": 0.0001, + "step": 9312 + }, + { + "epoch": 97.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9422492401215805, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.950920245398773, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11116538196802139, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9576059850374065, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6194, + "eval_samples_per_second": 274.473, + "eval_steps_per_second": 4.844, + "step": 9312 + }, + { + "epoch": 98.0, + "grad_norm": 0.00012084632908226922, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.0002, + "step": 9408 + }, + { + "epoch": 98.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9422492401215805, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.950920245398773, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11128947138786316, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9576059850374065, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6113, + "eval_samples_per_second": 278.09, + "eval_steps_per_second": 4.907, + "step": 9408 + }, + { + "epoch": 99.0, + "grad_norm": 0.00010305291652912274, + "learning_rate": 5.000000000000001e-07, + "loss": 0.0002, + "step": 9504 + }, + { + "epoch": 99.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9422492401215805, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.950920245398773, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.11142369359731674, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9576059850374065, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6123, + "eval_samples_per_second": 277.646, + "eval_steps_per_second": 4.9, + "step": 9504 + }, + { + "epoch": 100.0, + "grad_norm": 0.000133631780045107, + "learning_rate": 0.0, + "loss": 0.0001, + "step": 9600 + }, + { + "epoch": 100.0, + "eval_LOCATION_f1": 0.9578947368421052, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.9381443298969072, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.9422492401215805, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.950920245398773, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.1114446297287941, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9576059850374065, + "eval_overall_precision": 0.9576059850374065, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.6334, + "eval_samples_per_second": 268.413, + "eval_steps_per_second": 4.737, + "step": 9600 + }, + { + "epoch": 100.0, + "step": 9600, + "total_flos": 3771733492877262.0, + "train_loss": 0.005196920169595008, + "train_runtime": 2272.5888, + "train_samples_per_second": 67.368, + "train_steps_per_second": 4.224 + } + ], + "logging_steps": 500, + "max_steps": 9600, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 500, + "total_flos": 3771733492877262.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}