nerui-pt-pl20-1 / trainer_state.json
apwic's picture
End of training
3ac1b63 verified
raw
history blame
110 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"eval_steps": 500,
"global_step": 9600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.0963119268417358,
"learning_rate": 4.9500000000000004e-05,
"loss": 0.8462,
"step": 96
},
{
"epoch": 1.0,
"eval_LOCATION_f1": 0.0,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.0,
"eval_LOCATION_recall": 0.0,
"eval_ORGANIZATION_f1": 0.2613333333333333,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.22580645161290322,
"eval_ORGANIZATION_recall": 0.310126582278481,
"eval_PERSON_f1": 0.2878228782287823,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.2653061224489796,
"eval_PERSON_recall": 0.31451612903225806,
"eval_loss": 0.39636561274528503,
"eval_overall_accuracy": 0.8663189678836124,
"eval_overall_f1": 0.2300653594771242,
"eval_overall_precision": 0.23978201634877383,
"eval_overall_recall": 0.22110552763819097,
"eval_runtime": 0.6192,
"eval_samples_per_second": 274.541,
"eval_steps_per_second": 4.845,
"step": 96
},
{
"epoch": 2.0,
"grad_norm": 1.0906190872192383,
"learning_rate": 4.9e-05,
"loss": 0.3755,
"step": 192
},
{
"epoch": 2.0,
"eval_LOCATION_f1": 0.411764705882353,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.4016393442622951,
"eval_LOCATION_recall": 0.4224137931034483,
"eval_ORGANIZATION_f1": 0.5478547854785478,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.5724137931034483,
"eval_ORGANIZATION_recall": 0.5253164556962026,
"eval_PERSON_f1": 0.5597269624573379,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.48520710059171596,
"eval_PERSON_recall": 0.6612903225806451,
"eval_loss": 0.24201658368110657,
"eval_overall_accuracy": 0.925061762283832,
"eval_overall_f1": 0.513189448441247,
"eval_overall_precision": 0.4908256880733945,
"eval_overall_recall": 0.5376884422110553,
"eval_runtime": 0.6495,
"eval_samples_per_second": 261.724,
"eval_steps_per_second": 4.619,
"step": 192
},
{
"epoch": 3.0,
"grad_norm": 0.8066442608833313,
"learning_rate": 4.85e-05,
"loss": 0.2104,
"step": 288
},
{
"epoch": 3.0,
"eval_LOCATION_f1": 0.8296943231441049,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.8407079646017699,
"eval_LOCATION_recall": 0.8189655172413793,
"eval_ORGANIZATION_f1": 0.7823529411764705,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.7307692307692307,
"eval_ORGANIZATION_recall": 0.8417721518987342,
"eval_PERSON_f1": 0.9571984435797665,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.924812030075188,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.09525596350431442,
"eval_overall_accuracy": 0.9717265989569036,
"eval_overall_f1": 0.8498789346246972,
"eval_overall_precision": 0.8200934579439252,
"eval_overall_recall": 0.8819095477386935,
"eval_runtime": 0.6767,
"eval_samples_per_second": 251.223,
"eval_steps_per_second": 4.433,
"step": 288
},
{
"epoch": 4.0,
"grad_norm": 0.9108183979988098,
"learning_rate": 4.8e-05,
"loss": 0.1323,
"step": 384
},
{
"epoch": 4.0,
"eval_LOCATION_f1": 0.8286852589641435,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.7703703703703704,
"eval_LOCATION_recall": 0.896551724137931,
"eval_ORGANIZATION_f1": 0.8141592920353983,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.7624309392265194,
"eval_ORGANIZATION_recall": 0.8734177215189873,
"eval_PERSON_f1": 0.9685039370078741,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9461538461538461,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07598108053207397,
"eval_overall_accuracy": 0.9744715893494372,
"eval_overall_f1": 0.8649289099526066,
"eval_overall_precision": 0.8183856502242153,
"eval_overall_recall": 0.9170854271356784,
"eval_runtime": 0.6792,
"eval_samples_per_second": 250.289,
"eval_steps_per_second": 4.417,
"step": 384
},
{
"epoch": 5.0,
"grad_norm": 0.4616452753543854,
"learning_rate": 4.75e-05,
"loss": 0.1138,
"step": 480
},
{
"epoch": 5.0,
"eval_LOCATION_f1": 0.8571428571428572,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.8888888888888888,
"eval_LOCATION_recall": 0.8275862068965517,
"eval_ORGANIZATION_f1": 0.901840490797546,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.875,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9800796812749005,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.055858857929706573,
"eval_overall_accuracy": 0.9829810595662916,
"eval_overall_f1": 0.9138576779026217,
"eval_overall_precision": 0.9081885856079405,
"eval_overall_recall": 0.9195979899497487,
"eval_runtime": 0.6466,
"eval_samples_per_second": 262.924,
"eval_steps_per_second": 4.64,
"step": 480
},
{
"epoch": 6.0,
"grad_norm": 0.7623656988143921,
"learning_rate": 4.7e-05,
"loss": 0.0958,
"step": 576
},
{
"epoch": 6.0,
"eval_LOCATION_f1": 0.8571428571428572,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.813953488372093,
"eval_LOCATION_recall": 0.9051724137931034,
"eval_ORGANIZATION_f1": 0.8896103896103896,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9133333333333333,
"eval_ORGANIZATION_recall": 0.8670886075949367,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04952048137784004,
"eval_overall_accuracy": 0.9846280538018117,
"eval_overall_f1": 0.9102244389027431,
"eval_overall_precision": 0.9034653465346535,
"eval_overall_recall": 0.9170854271356784,
"eval_runtime": 0.6346,
"eval_samples_per_second": 267.871,
"eval_steps_per_second": 4.727,
"step": 576
},
{
"epoch": 7.0,
"grad_norm": 1.0199775695800781,
"learning_rate": 4.6500000000000005e-05,
"loss": 0.0842,
"step": 672
},
{
"epoch": 7.0,
"eval_LOCATION_f1": 0.8928571428571429,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9259259259259259,
"eval_LOCATION_recall": 0.8620689655172413,
"eval_ORGANIZATION_f1": 0.8942598187311178,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.8554913294797688,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04352428764104843,
"eval_overall_accuracy": 0.9860005489980785,
"eval_overall_f1": 0.9228855721393034,
"eval_overall_precision": 0.9137931034482759,
"eval_overall_recall": 0.9321608040201005,
"eval_runtime": 0.6425,
"eval_samples_per_second": 264.584,
"eval_steps_per_second": 4.669,
"step": 672
},
{
"epoch": 8.0,
"grad_norm": 0.3342026472091675,
"learning_rate": 4.600000000000001e-05,
"loss": 0.0741,
"step": 768
},
{
"epoch": 8.0,
"eval_LOCATION_f1": 0.9045643153526971,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.872,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9096774193548387,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9276315789473685,
"eval_ORGANIZATION_recall": 0.8924050632911392,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.041092827916145325,
"eval_overall_accuracy": 0.987098545155092,
"eval_overall_f1": 0.9325,
"eval_overall_precision": 0.927860696517413,
"eval_overall_recall": 0.9371859296482412,
"eval_runtime": 0.644,
"eval_samples_per_second": 263.989,
"eval_steps_per_second": 4.659,
"step": 768
},
{
"epoch": 9.0,
"grad_norm": 0.3780902922153473,
"learning_rate": 4.55e-05,
"loss": 0.0704,
"step": 864
},
{
"epoch": 9.0,
"eval_LOCATION_f1": 0.9451476793248945,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9256198347107438,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9345794392523364,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9202453987730062,
"eval_ORGANIZATION_recall": 0.9493670886075949,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.037949126213788986,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9529702970297028,
"eval_overall_precision": 0.9390243902439024,
"eval_overall_recall": 0.9673366834170855,
"eval_runtime": 0.6421,
"eval_samples_per_second": 264.766,
"eval_steps_per_second": 4.672,
"step": 864
},
{
"epoch": 10.0,
"grad_norm": 0.5685635805130005,
"learning_rate": 4.5e-05,
"loss": 0.063,
"step": 960
},
{
"epoch": 10.0,
"eval_LOCATION_f1": 0.9068825910931174,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.8549618320610687,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9320388349514563,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9536423841059603,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04031159356236458,
"eval_overall_accuracy": 0.9865495470765853,
"eval_overall_f1": 0.9416149068322981,
"eval_overall_precision": 0.9312039312039312,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.6408,
"eval_samples_per_second": 265.303,
"eval_steps_per_second": 4.682,
"step": 960
},
{
"epoch": 11.0,
"grad_norm": 0.2671253979206085,
"learning_rate": 4.4500000000000004e-05,
"loss": 0.0579,
"step": 1056
},
{
"epoch": 11.0,
"eval_LOCATION_f1": 0.9237288135593221,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9083333333333333,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9216300940438872,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9130434782608695,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9800796812749005,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.0378592312335968,
"eval_overall_accuracy": 0.9884710403513588,
"eval_overall_f1": 0.9404466501240695,
"eval_overall_precision": 0.928921568627451,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.6399,
"eval_samples_per_second": 265.658,
"eval_steps_per_second": 4.688,
"step": 1056
},
{
"epoch": 12.0,
"grad_norm": 0.42393070459365845,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.053,
"step": 1152
},
{
"epoch": 12.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9579288025889968,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9801324503311258,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.035319238901138306,
"eval_overall_accuracy": 0.9903925336261323,
"eval_overall_f1": 0.9584905660377357,
"eval_overall_precision": 0.9596977329974811,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.6433,
"eval_samples_per_second": 264.248,
"eval_steps_per_second": 4.663,
"step": 1152
},
{
"epoch": 13.0,
"grad_norm": 0.5318233370780945,
"learning_rate": 4.35e-05,
"loss": 0.0512,
"step": 1248
},
{
"epoch": 13.0,
"eval_LOCATION_f1": 0.9198312236286921,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9008264462809917,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.935064935064935,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.96,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03649090975522995,
"eval_overall_accuracy": 0.9890200384298655,
"eval_overall_f1": 0.947103274559194,
"eval_overall_precision": 0.9494949494949495,
"eval_overall_recall": 0.9447236180904522,
"eval_runtime": 0.7031,
"eval_samples_per_second": 241.776,
"eval_steps_per_second": 4.267,
"step": 1248
},
{
"epoch": 14.0,
"grad_norm": 0.34443172812461853,
"learning_rate": 4.3e-05,
"loss": 0.0478,
"step": 1344
},
{
"epoch": 14.0,
"eval_LOCATION_f1": 0.9012875536480686,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.8974358974358975,
"eval_LOCATION_recall": 0.9051724137931034,
"eval_ORGANIZATION_f1": 0.9363057324840764,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9423076923076923,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03145522624254227,
"eval_overall_accuracy": 0.9890200384298655,
"eval_overall_f1": 0.9422110552763819,
"eval_overall_precision": 0.9422110552763819,
"eval_overall_recall": 0.9422110552763819,
"eval_runtime": 0.6372,
"eval_samples_per_second": 266.797,
"eval_steps_per_second": 4.708,
"step": 1344
},
{
"epoch": 15.0,
"grad_norm": 0.7487108707427979,
"learning_rate": 4.25e-05,
"loss": 0.0459,
"step": 1440
},
{
"epoch": 15.0,
"eval_LOCATION_f1": 0.9276595744680851,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9159663865546218,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9333333333333332,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9363057324840764,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03564862161874771,
"eval_overall_accuracy": 0.9892945374691189,
"eval_overall_f1": 0.9475,
"eval_overall_precision": 0.9427860696517413,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.632,
"eval_samples_per_second": 268.989,
"eval_steps_per_second": 4.747,
"step": 1440
},
{
"epoch": 16.0,
"grad_norm": 0.5481808185577393,
"learning_rate": 4.2e-05,
"loss": 0.0442,
"step": 1536
},
{
"epoch": 16.0,
"eval_LOCATION_f1": 0.9159663865546218,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.8934426229508197,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.932475884244373,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9477124183006536,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03452915698289871,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9448621553884713,
"eval_overall_precision": 0.9425,
"eval_overall_recall": 0.9472361809045227,
"eval_runtime": 0.6411,
"eval_samples_per_second": 265.187,
"eval_steps_per_second": 4.68,
"step": 1536
},
{
"epoch": 17.0,
"grad_norm": 0.7891280055046082,
"learning_rate": 4.15e-05,
"loss": 0.0388,
"step": 1632
},
{
"epoch": 17.0,
"eval_LOCATION_f1": 0.9367088607594937,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9173553719008265,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9329073482428115,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9419354838709677,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9800796812749005,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.033535558730363846,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9488139825218478,
"eval_overall_precision": 0.9429280397022333,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6327,
"eval_samples_per_second": 268.684,
"eval_steps_per_second": 4.741,
"step": 1632
},
{
"epoch": 18.0,
"grad_norm": 0.5801985859870911,
"learning_rate": 4.1e-05,
"loss": 0.0394,
"step": 1728
},
{
"epoch": 18.0,
"eval_LOCATION_f1": 0.9487179487179486,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.940677966101695,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03287326544523239,
"eval_overall_accuracy": 0.9903925336261323,
"eval_overall_f1": 0.9584905660377357,
"eval_overall_precision": 0.9596977329974811,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.6421,
"eval_samples_per_second": 264.774,
"eval_steps_per_second": 4.672,
"step": 1728
},
{
"epoch": 19.0,
"grad_norm": 0.6355949640274048,
"learning_rate": 4.05e-05,
"loss": 0.0367,
"step": 1824
},
{
"epoch": 19.0,
"eval_LOCATION_f1": 0.9406779661016949,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.925,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9442622950819672,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9795918367346939,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03376775234937668,
"eval_overall_accuracy": 0.9903925336261323,
"eval_overall_f1": 0.9557522123893806,
"eval_overall_precision": 0.9618320610687023,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.6327,
"eval_samples_per_second": 268.698,
"eval_steps_per_second": 4.742,
"step": 1824
},
{
"epoch": 20.0,
"grad_norm": 0.27867820858955383,
"learning_rate": 4e-05,
"loss": 0.0348,
"step": 1920
},
{
"epoch": 20.0,
"eval_LOCATION_f1": 0.9487179487179486,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.940677966101695,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9171974522292994,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9230769230769231,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03843284398317337,
"eval_overall_accuracy": 0.9887455393906122,
"eval_overall_f1": 0.9473684210526316,
"eval_overall_precision": 0.945,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.6346,
"eval_samples_per_second": 267.864,
"eval_steps_per_second": 4.727,
"step": 1920
},
{
"epoch": 21.0,
"grad_norm": 0.4647029638290405,
"learning_rate": 3.9500000000000005e-05,
"loss": 0.0334,
"step": 2016
},
{
"epoch": 21.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9354838709677419,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9539473684210527,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.9761904761904763,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9609375,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03526832535862923,
"eval_overall_accuracy": 0.9890200384298655,
"eval_overall_f1": 0.9461827284105131,
"eval_overall_precision": 0.942643391521197,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.64,
"eval_samples_per_second": 265.607,
"eval_steps_per_second": 4.687,
"step": 2016
},
{
"epoch": 22.0,
"grad_norm": 0.326353520154953,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.0308,
"step": 2112
},
{
"epoch": 22.0,
"eval_LOCATION_f1": 0.9495798319327732,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9262295081967213,
"eval_LOCATION_recall": 0.9741379310344828,
"eval_ORGANIZATION_f1": 0.9407894736842106,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9794520547945206,
"eval_ORGANIZATION_recall": 0.9050632911392406,
"eval_PERSON_f1": 0.9761904761904763,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9609375,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03510461002588272,
"eval_overall_accuracy": 0.9892945374691189,
"eval_overall_f1": 0.9546599496221662,
"eval_overall_precision": 0.9570707070707071,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.6413,
"eval_samples_per_second": 265.101,
"eval_steps_per_second": 4.678,
"step": 2112
},
{
"epoch": 23.0,
"grad_norm": 0.44554173946380615,
"learning_rate": 3.85e-05,
"loss": 0.0303,
"step": 2208
},
{
"epoch": 23.0,
"eval_LOCATION_f1": 0.9451476793248945,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9256198347107438,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.948051948051948,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9733333333333334,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9838709677419355,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.035424478352069855,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9583858764186632,
"eval_overall_precision": 0.9620253164556962,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6652,
"eval_samples_per_second": 255.567,
"eval_steps_per_second": 4.51,
"step": 2208
},
{
"epoch": 24.0,
"grad_norm": 0.23773515224456787,
"learning_rate": 3.8e-05,
"loss": 0.0282,
"step": 2304
},
{
"epoch": 24.0,
"eval_LOCATION_f1": 0.9356223175965666,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9316239316239316,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9463722397476341,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9433962264150944,
"eval_ORGANIZATION_recall": 0.9493670886075949,
"eval_PERSON_f1": 0.9761904761904763,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9609375,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03448145464062691,
"eval_overall_accuracy": 0.9898435355476256,
"eval_overall_f1": 0.9526184538653367,
"eval_overall_precision": 0.9455445544554455,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.6436,
"eval_samples_per_second": 264.155,
"eval_steps_per_second": 4.662,
"step": 2304
},
{
"epoch": 25.0,
"grad_norm": 0.30238980054855347,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.0295,
"step": 2400
},
{
"epoch": 25.0,
"eval_LOCATION_f1": 0.9361702127659575,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9243697478991597,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9483870967741935,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9671052631578947,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9761904761904763,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9609375,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03953889012336731,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9535759096612295,
"eval_overall_precision": 0.9523809523809523,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6552,
"eval_samples_per_second": 259.455,
"eval_steps_per_second": 4.579,
"step": 2400
},
{
"epoch": 26.0,
"grad_norm": 0.1625722199678421,
"learning_rate": 3.7e-05,
"loss": 0.0275,
"step": 2496
},
{
"epoch": 26.0,
"eval_LOCATION_f1": 0.9372384937238494,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9105691056910569,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9508196721311475,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9863945578231292,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.9761904761904763,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9609375,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03993573784828186,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9547738693467337,
"eval_overall_precision": 0.9547738693467337,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6494,
"eval_samples_per_second": 261.797,
"eval_steps_per_second": 4.62,
"step": 2496
},
{
"epoch": 27.0,
"grad_norm": 0.5146996974945068,
"learning_rate": 3.65e-05,
"loss": 0.0255,
"step": 2592
},
{
"epoch": 27.0,
"eval_LOCATION_f1": 0.944206008583691,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9401709401709402,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9201277955271565,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9290322580645162,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9761904761904763,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9609375,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03630305826663971,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9448621553884713,
"eval_overall_precision": 0.9425,
"eval_overall_recall": 0.9472361809045227,
"eval_runtime": 0.631,
"eval_samples_per_second": 269.407,
"eval_steps_per_second": 4.754,
"step": 2592
},
{
"epoch": 28.0,
"grad_norm": 0.5589452385902405,
"learning_rate": 3.6e-05,
"loss": 0.0245,
"step": 2688
},
{
"epoch": 28.0,
"eval_LOCATION_f1": 0.9322033898305084,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9367088607594937,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9367088607594937,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04390503466129303,
"eval_overall_accuracy": 0.9890200384298655,
"eval_overall_f1": 0.9501246882793017,
"eval_overall_precision": 0.943069306930693,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.6506,
"eval_samples_per_second": 261.309,
"eval_steps_per_second": 4.611,
"step": 2688
},
{
"epoch": 29.0,
"grad_norm": 0.2479996234178543,
"learning_rate": 3.55e-05,
"loss": 0.0233,
"step": 2784
},
{
"epoch": 29.0,
"eval_LOCATION_f1": 0.9294605809128631,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.896,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9576547231270359,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9865771812080537,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.042132169008255005,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9573934837092732,
"eval_overall_precision": 0.955,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.6376,
"eval_samples_per_second": 266.607,
"eval_steps_per_second": 4.705,
"step": 2784
},
{
"epoch": 30.0,
"grad_norm": 0.6440322399139404,
"learning_rate": 3.5e-05,
"loss": 0.023,
"step": 2880
},
{
"epoch": 30.0,
"eval_LOCATION_f1": 0.9137931034482759,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9137931034482759,
"eval_LOCATION_recall": 0.9137931034482759,
"eval_ORGANIZATION_f1": 0.9358974358974359,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.948051948051948,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04351821914315224,
"eval_overall_accuracy": 0.9884710403513588,
"eval_overall_f1": 0.9457755359394704,
"eval_overall_precision": 0.9493670886075949,
"eval_overall_recall": 0.9422110552763819,
"eval_runtime": 0.6288,
"eval_samples_per_second": 270.344,
"eval_steps_per_second": 4.771,
"step": 2880
},
{
"epoch": 31.0,
"grad_norm": 1.1185063123703003,
"learning_rate": 3.45e-05,
"loss": 0.0238,
"step": 2976
},
{
"epoch": 31.0,
"eval_LOCATION_f1": 0.9191489361702128,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.907563025210084,
"eval_LOCATION_recall": 0.9310344827586207,
"eval_ORGANIZATION_f1": 0.9415584415584415,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9666666666666667,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03737273067235947,
"eval_overall_accuracy": 0.9892945374691189,
"eval_overall_f1": 0.948297604035309,
"eval_overall_precision": 0.9518987341772152,
"eval_overall_recall": 0.9447236180904522,
"eval_runtime": 0.6598,
"eval_samples_per_second": 257.654,
"eval_steps_per_second": 4.547,
"step": 2976
},
{
"epoch": 32.0,
"grad_norm": 0.36497652530670166,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.0214,
"step": 3072
},
{
"epoch": 32.0,
"eval_LOCATION_f1": 0.9316239316239315,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.923728813559322,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.0383097268640995,
"eval_overall_accuracy": 0.9914905297831458,
"eval_overall_f1": 0.9583858764186632,
"eval_overall_precision": 0.9620253164556962,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6363,
"eval_samples_per_second": 267.177,
"eval_steps_per_second": 4.715,
"step": 3072
},
{
"epoch": 33.0,
"grad_norm": 0.5497635006904602,
"learning_rate": 3.35e-05,
"loss": 0.021,
"step": 3168
},
{
"epoch": 33.0,
"eval_LOCATION_f1": 0.927038626609442,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9230769230769231,
"eval_LOCATION_recall": 0.9310344827586207,
"eval_ORGANIZATION_f1": 0.9354838709677419,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9539473684210527,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03853040188550949,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9494949494949495,
"eval_overall_precision": 0.9543147208121827,
"eval_overall_recall": 0.9447236180904522,
"eval_runtime": 0.64,
"eval_samples_per_second": 265.639,
"eval_steps_per_second": 4.688,
"step": 3168
},
{
"epoch": 34.0,
"grad_norm": 0.08254751563072205,
"learning_rate": 3.3e-05,
"loss": 0.0219,
"step": 3264
},
{
"epoch": 34.0,
"eval_LOCATION_f1": 0.925,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.8951612903225806,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9285714285714286,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9533333333333334,
"eval_ORGANIZATION_recall": 0.9050632911392406,
"eval_PERSON_f1": 0.9761904761904763,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9609375,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.05139599367976189,
"eval_overall_accuracy": 0.987098545155092,
"eval_overall_f1": 0.9425,
"eval_overall_precision": 0.9378109452736318,
"eval_overall_recall": 0.9472361809045227,
"eval_runtime": 0.6431,
"eval_samples_per_second": 264.33,
"eval_steps_per_second": 4.665,
"step": 3264
},
{
"epoch": 35.0,
"grad_norm": 0.5499096512794495,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.0204,
"step": 3360
},
{
"epoch": 35.0,
"eval_LOCATION_f1": 0.9361702127659575,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9243697478991597,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.948051948051948,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9733333333333334,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04065559431910515,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9570707070707072,
"eval_overall_precision": 0.9619289340101523,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.6491,
"eval_samples_per_second": 261.916,
"eval_steps_per_second": 4.622,
"step": 3360
},
{
"epoch": 36.0,
"grad_norm": 0.1895403116941452,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.0186,
"step": 3456
},
{
"epoch": 36.0,
"eval_LOCATION_f1": 0.9367088607594937,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9173553719008265,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9449838187702266,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9668874172185431,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04075060784816742,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9559748427672956,
"eval_overall_precision": 0.9571788413098237,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6449,
"eval_samples_per_second": 263.608,
"eval_steps_per_second": 4.652,
"step": 3456
},
{
"epoch": 37.0,
"grad_norm": 0.3846062123775482,
"learning_rate": 3.15e-05,
"loss": 0.0198,
"step": 3552
},
{
"epoch": 37.0,
"eval_LOCATION_f1": 0.9310344827586207,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9310344827586207,
"eval_LOCATION_recall": 0.9310344827586207,
"eval_ORGANIZATION_f1": 0.935064935064935,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.96,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9723320158102766,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9534883720930233,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.046658262610435486,
"eval_overall_accuracy": 0.9890200384298655,
"eval_overall_f1": 0.9457755359394704,
"eval_overall_precision": 0.9493670886075949,
"eval_overall_recall": 0.9422110552763819,
"eval_runtime": 0.6386,
"eval_samples_per_second": 266.194,
"eval_steps_per_second": 4.698,
"step": 3552
},
{
"epoch": 38.0,
"grad_norm": 0.2393975853919983,
"learning_rate": 3.1e-05,
"loss": 0.0193,
"step": 3648
},
{
"epoch": 38.0,
"eval_LOCATION_f1": 0.9446808510638298,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9327731092436975,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9411764705882353,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.972972972972973,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9800796812749005,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04305936023592949,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9545454545454546,
"eval_overall_precision": 0.9593908629441624,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.6482,
"eval_samples_per_second": 262.279,
"eval_steps_per_second": 4.628,
"step": 3648
},
{
"epoch": 39.0,
"grad_norm": 0.14480465650558472,
"learning_rate": 3.05e-05,
"loss": 0.0187,
"step": 3744
},
{
"epoch": 39.0,
"eval_LOCATION_f1": 0.9367088607594937,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9173553719008265,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9306930693069307,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9724137931034482,
"eval_ORGANIZATION_recall": 0.8924050632911392,
"eval_PERSON_f1": 0.9761904761904763,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9609375,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04795057699084282,
"eval_overall_accuracy": 0.9884710403513588,
"eval_overall_f1": 0.946969696969697,
"eval_overall_precision": 0.9517766497461929,
"eval_overall_recall": 0.9422110552763819,
"eval_runtime": 0.6557,
"eval_samples_per_second": 259.265,
"eval_steps_per_second": 4.575,
"step": 3744
},
{
"epoch": 40.0,
"grad_norm": 0.5256401896476746,
"learning_rate": 3e-05,
"loss": 0.0189,
"step": 3840
},
{
"epoch": 40.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9342105263157895,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9726027397260274,
"eval_ORGANIZATION_recall": 0.8987341772151899,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03981956094503403,
"eval_overall_accuracy": 0.9903925336261323,
"eval_overall_f1": 0.9481668773704173,
"eval_overall_precision": 0.9541984732824428,
"eval_overall_recall": 0.9422110552763819,
"eval_runtime": 0.6564,
"eval_samples_per_second": 258.992,
"eval_steps_per_second": 4.57,
"step": 3840
},
{
"epoch": 41.0,
"grad_norm": 0.17553403973579407,
"learning_rate": 2.95e-05,
"loss": 0.0163,
"step": 3936
},
{
"epoch": 41.0,
"eval_LOCATION_f1": 0.9316239316239315,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.923728813559322,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9354838709677419,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9539473684210527,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.039753276854753494,
"eval_overall_accuracy": 0.9898435355476256,
"eval_overall_f1": 0.947103274559194,
"eval_overall_precision": 0.9494949494949495,
"eval_overall_recall": 0.9447236180904522,
"eval_runtime": 0.6452,
"eval_samples_per_second": 263.471,
"eval_steps_per_second": 4.649,
"step": 3936
},
{
"epoch": 42.0,
"grad_norm": 0.05329589918255806,
"learning_rate": 2.9e-05,
"loss": 0.0181,
"step": 4032
},
{
"epoch": 42.0,
"eval_LOCATION_f1": 0.95,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9193548387096774,
"eval_LOCATION_recall": 0.9827586206896551,
"eval_ORGANIZATION_f1": 0.9271523178807947,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9722222222222222,
"eval_ORGANIZATION_recall": 0.8860759493670886,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04664554074406624,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9532237673830594,
"eval_overall_precision": 0.9592875318066157,
"eval_overall_recall": 0.9472361809045227,
"eval_runtime": 0.6419,
"eval_samples_per_second": 264.832,
"eval_steps_per_second": 4.674,
"step": 4032
},
{
"epoch": 43.0,
"grad_norm": 0.8441613912582397,
"learning_rate": 2.8499999999999998e-05,
"loss": 0.0153,
"step": 4128
},
{
"epoch": 43.0,
"eval_LOCATION_f1": 0.9361702127659575,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9243697478991597,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9311475409836065,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9659863945578231,
"eval_ORGANIZATION_recall": 0.8987341772151899,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04511801898479462,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9505703422053232,
"eval_overall_precision": 0.959079283887468,
"eval_overall_recall": 0.9422110552763819,
"eval_runtime": 0.7562,
"eval_samples_per_second": 224.818,
"eval_steps_per_second": 3.967,
"step": 4128
},
{
"epoch": 44.0,
"grad_norm": 0.4699605703353882,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.017,
"step": 4224
},
{
"epoch": 44.0,
"eval_LOCATION_f1": 0.9406779661016949,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.925,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9294871794871796,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9415584415584416,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04654153808951378,
"eval_overall_accuracy": 0.9884710403513588,
"eval_overall_f1": 0.9485570890840652,
"eval_overall_precision": 0.9473684210526315,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.6307,
"eval_samples_per_second": 269.55,
"eval_steps_per_second": 4.757,
"step": 4224
},
{
"epoch": 45.0,
"grad_norm": 0.8079507350921631,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.0159,
"step": 4320
},
{
"epoch": 45.0,
"eval_LOCATION_f1": 0.9322033898305084,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.934640522875817,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9662162162162162,
"eval_ORGANIZATION_recall": 0.9050632911392406,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04502361640334129,
"eval_overall_accuracy": 0.9898435355476256,
"eval_overall_f1": 0.9481668773704173,
"eval_overall_precision": 0.9541984732824428,
"eval_overall_recall": 0.9422110552763819,
"eval_runtime": 0.6336,
"eval_samples_per_second": 268.323,
"eval_steps_per_second": 4.735,
"step": 4320
},
{
"epoch": 46.0,
"grad_norm": 0.20807619392871857,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.0156,
"step": 4416
},
{
"epoch": 46.0,
"eval_LOCATION_f1": 0.9451476793248945,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9256198347107438,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9281045751633986,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9594594594594594,
"eval_ORGANIZATION_recall": 0.8987341772151899,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.049771908670663834,
"eval_overall_accuracy": 0.9890200384298655,
"eval_overall_f1": 0.9494949494949495,
"eval_overall_precision": 0.9543147208121827,
"eval_overall_recall": 0.9447236180904522,
"eval_runtime": 0.648,
"eval_samples_per_second": 262.345,
"eval_steps_per_second": 4.63,
"step": 4416
},
{
"epoch": 47.0,
"grad_norm": 0.4664674997329712,
"learning_rate": 2.6500000000000004e-05,
"loss": 0.0168,
"step": 4512
},
{
"epoch": 47.0,
"eval_LOCATION_f1": 0.9361702127659575,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9243697478991597,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9385113268608414,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9602649006622517,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.047012705355882645,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9496221662468514,
"eval_overall_precision": 0.952020202020202,
"eval_overall_recall": 0.9472361809045227,
"eval_runtime": 0.6464,
"eval_samples_per_second": 263.015,
"eval_steps_per_second": 4.641,
"step": 4512
},
{
"epoch": 48.0,
"grad_norm": 0.5088257789611816,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.0146,
"step": 4608
},
{
"epoch": 48.0,
"eval_LOCATION_f1": 0.9406779661016949,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.925,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9381107491856678,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9664429530201343,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.045517031103372574,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9545454545454546,
"eval_overall_precision": 0.9593908629441624,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.6345,
"eval_samples_per_second": 267.916,
"eval_steps_per_second": 4.728,
"step": 4608
},
{
"epoch": 49.0,
"grad_norm": 0.4384995102882385,
"learning_rate": 2.5500000000000003e-05,
"loss": 0.013,
"step": 4704
},
{
"epoch": 49.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.954248366013072,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9864864864864865,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04442460089921951,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.9570707070707072,
"eval_overall_precision": 0.9619289340101523,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.6446,
"eval_samples_per_second": 263.735,
"eval_steps_per_second": 4.654,
"step": 4704
},
{
"epoch": 50.0,
"grad_norm": 0.7813078761100769,
"learning_rate": 2.5e-05,
"loss": 0.0142,
"step": 4800
},
{
"epoch": 50.0,
"eval_LOCATION_f1": 0.9237288135593221,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9083333333333333,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04556039720773697,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9546599496221662,
"eval_overall_precision": 0.9570707070707071,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.6598,
"eval_samples_per_second": 257.672,
"eval_steps_per_second": 4.547,
"step": 4800
},
{
"epoch": 51.0,
"grad_norm": 0.4199505150318146,
"learning_rate": 2.45e-05,
"loss": 0.012,
"step": 4896
},
{
"epoch": 51.0,
"eval_LOCATION_f1": 0.9406779661016949,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.925,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9442622950819672,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9795918367346939,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.044391099363565445,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9569620253164557,
"eval_overall_precision": 0.9642857142857143,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.636,
"eval_samples_per_second": 267.283,
"eval_steps_per_second": 4.717,
"step": 4896
},
{
"epoch": 52.0,
"grad_norm": 0.49055948853492737,
"learning_rate": 2.4e-05,
"loss": 0.0141,
"step": 4992
},
{
"epoch": 52.0,
"eval_LOCATION_f1": 0.9451476793248945,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9256198347107438,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9415584415584415,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9666666666666667,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04581276699900627,
"eval_overall_accuracy": 0.9892945374691189,
"eval_overall_f1": 0.9571788413098236,
"eval_overall_precision": 0.9595959595959596,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6478,
"eval_samples_per_second": 262.415,
"eval_steps_per_second": 4.631,
"step": 4992
},
{
"epoch": 53.0,
"grad_norm": 0.16503672301769257,
"learning_rate": 2.35e-05,
"loss": 0.0121,
"step": 5088
},
{
"epoch": 53.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9381107491856678,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9664429530201343,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.046965762972831726,
"eval_overall_accuracy": 0.9892945374691189,
"eval_overall_f1": 0.9508196721311476,
"eval_overall_precision": 0.9544303797468354,
"eval_overall_recall": 0.9472361809045227,
"eval_runtime": 0.6401,
"eval_samples_per_second": 265.568,
"eval_steps_per_second": 4.686,
"step": 5088
},
{
"epoch": 54.0,
"grad_norm": 0.016472522169351578,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.0121,
"step": 5184
},
{
"epoch": 54.0,
"eval_LOCATION_f1": 0.9322033898305084,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9329073482428115,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9419354838709677,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.046427227556705475,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9498746867167919,
"eval_overall_precision": 0.9475,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.6476,
"eval_samples_per_second": 262.498,
"eval_steps_per_second": 4.632,
"step": 5184
},
{
"epoch": 55.0,
"grad_norm": 0.8974384069442749,
"learning_rate": 2.25e-05,
"loss": 0.0114,
"step": 5280
},
{
"epoch": 55.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9442622950819672,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9795918367346939,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9800796812749005,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.0443260557949543,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9508196721311476,
"eval_overall_precision": 0.9544303797468354,
"eval_overall_recall": 0.9472361809045227,
"eval_runtime": 0.6318,
"eval_samples_per_second": 269.083,
"eval_steps_per_second": 4.749,
"step": 5280
},
{
"epoch": 56.0,
"grad_norm": 0.4733685553073883,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.0124,
"step": 5376
},
{
"epoch": 56.0,
"eval_LOCATION_f1": 0.9237288135593221,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9083333333333333,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9483870967741935,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9671052631578947,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9761904761904763,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9609375,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04666825383901596,
"eval_overall_accuracy": 0.9892945374691189,
"eval_overall_f1": 0.9498746867167919,
"eval_overall_precision": 0.9475,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.7135,
"eval_samples_per_second": 238.246,
"eval_steps_per_second": 4.204,
"step": 5376
},
{
"epoch": 57.0,
"grad_norm": 0.35932302474975586,
"learning_rate": 2.15e-05,
"loss": 0.0125,
"step": 5472
},
{
"epoch": 57.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9446254071661238,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9731543624161074,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04612690582871437,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9521410579345088,
"eval_overall_precision": 0.9545454545454546,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.65,
"eval_samples_per_second": 261.533,
"eval_steps_per_second": 4.615,
"step": 5472
},
{
"epoch": 58.0,
"grad_norm": 0.02410867251455784,
"learning_rate": 2.1e-05,
"loss": 0.0106,
"step": 5568
},
{
"epoch": 58.0,
"eval_LOCATION_f1": 0.9198312236286921,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9008264462809917,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9342105263157895,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9726027397260274,
"eval_ORGANIZATION_recall": 0.8987341772151899,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.0608096569776535,
"eval_overall_accuracy": 0.9876475432335987,
"eval_overall_f1": 0.943109987357775,
"eval_overall_precision": 0.9491094147582697,
"eval_overall_recall": 0.9371859296482412,
"eval_runtime": 0.7068,
"eval_samples_per_second": 240.534,
"eval_steps_per_second": 4.245,
"step": 5568
},
{
"epoch": 59.0,
"grad_norm": 0.08885292708873749,
"learning_rate": 2.05e-05,
"loss": 0.0113,
"step": 5664
},
{
"epoch": 59.0,
"eval_LOCATION_f1": 0.9406779661016949,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.925,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9411764705882353,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.972972972972973,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04704877734184265,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9532237673830594,
"eval_overall_precision": 0.9592875318066157,
"eval_overall_recall": 0.9472361809045227,
"eval_runtime": 0.6412,
"eval_samples_per_second": 265.119,
"eval_steps_per_second": 4.679,
"step": 5664
},
{
"epoch": 60.0,
"grad_norm": 0.16123533248901367,
"learning_rate": 2e-05,
"loss": 0.0104,
"step": 5760
},
{
"epoch": 60.0,
"eval_LOCATION_f1": 0.9406779661016949,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.925,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9576547231270359,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9865771812080537,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04556664079427719,
"eval_overall_accuracy": 0.9917650288223991,
"eval_overall_f1": 0.962121212121212,
"eval_overall_precision": 0.9670050761421319,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.6451,
"eval_samples_per_second": 263.515,
"eval_steps_per_second": 4.65,
"step": 5760
},
{
"epoch": 61.0,
"grad_norm": 0.2441156953573227,
"learning_rate": 1.9500000000000003e-05,
"loss": 0.0117,
"step": 5856
},
{
"epoch": 61.0,
"eval_LOCATION_f1": 0.9322033898305084,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9407894736842106,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9794520547945206,
"eval_ORGANIZATION_recall": 0.9050632911392406,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.05170726031064987,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9505703422053232,
"eval_overall_precision": 0.959079283887468,
"eval_overall_recall": 0.9422110552763819,
"eval_runtime": 0.6477,
"eval_samples_per_second": 262.455,
"eval_steps_per_second": 4.632,
"step": 5856
},
{
"epoch": 62.0,
"grad_norm": 1.3389254808425903,
"learning_rate": 1.9e-05,
"loss": 0.0117,
"step": 5952
},
{
"epoch": 62.0,
"eval_LOCATION_f1": 0.9237288135593221,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9083333333333333,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.046722497791051865,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9521410579345088,
"eval_overall_precision": 0.9545454545454546,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.6347,
"eval_samples_per_second": 267.826,
"eval_steps_per_second": 4.726,
"step": 5952
},
{
"epoch": 63.0,
"grad_norm": 0.06463531404733658,
"learning_rate": 1.85e-05,
"loss": 0.0117,
"step": 6048
},
{
"epoch": 63.0,
"eval_LOCATION_f1": 0.9367088607594937,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9173553719008265,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9377049180327869,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9727891156462585,
"eval_ORGANIZATION_recall": 0.9050632911392406,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.05383671075105667,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9506953223767383,
"eval_overall_precision": 0.9567430025445293,
"eval_overall_recall": 0.9447236180904522,
"eval_runtime": 0.6463,
"eval_samples_per_second": 263.026,
"eval_steps_per_second": 4.642,
"step": 6048
},
{
"epoch": 64.0,
"grad_norm": 0.44926658272743225,
"learning_rate": 1.8e-05,
"loss": 0.0096,
"step": 6144
},
{
"epoch": 64.0,
"eval_LOCATION_f1": 0.923076923076923,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9152542372881356,
"eval_LOCATION_recall": 0.9310344827586207,
"eval_ORGANIZATION_f1": 0.9315960912052117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.959731543624161,
"eval_ORGANIZATION_recall": 0.9050632911392406,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.06059302017092705,
"eval_overall_accuracy": 0.9887455393906122,
"eval_overall_f1": 0.943109987357775,
"eval_overall_precision": 0.9491094147582697,
"eval_overall_recall": 0.9371859296482412,
"eval_runtime": 0.6446,
"eval_samples_per_second": 263.71,
"eval_steps_per_second": 4.654,
"step": 6144
},
{
"epoch": 65.0,
"grad_norm": 0.41238224506378174,
"learning_rate": 1.75e-05,
"loss": 0.0087,
"step": 6240
},
{
"epoch": 65.0,
"eval_LOCATION_f1": 0.9237288135593221,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9083333333333333,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9473684210526315,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9863013698630136,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.05515790358185768,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9505703422053232,
"eval_overall_precision": 0.959079283887468,
"eval_overall_recall": 0.9422110552763819,
"eval_runtime": 0.6403,
"eval_samples_per_second": 265.48,
"eval_steps_per_second": 4.685,
"step": 6240
},
{
"epoch": 66.0,
"grad_norm": 0.14979392290115356,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.0105,
"step": 6336
},
{
"epoch": 66.0,
"eval_LOCATION_f1": 0.9276595744680851,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9159663865546218,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9381107491856678,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9664429530201343,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.05218491330742836,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9481668773704173,
"eval_overall_precision": 0.9541984732824428,
"eval_overall_recall": 0.9422110552763819,
"eval_runtime": 0.6431,
"eval_samples_per_second": 264.344,
"eval_steps_per_second": 4.665,
"step": 6336
},
{
"epoch": 67.0,
"grad_norm": 1.044791579246521,
"learning_rate": 1.65e-05,
"loss": 0.0097,
"step": 6432
},
{
"epoch": 67.0,
"eval_LOCATION_f1": 0.9406779661016949,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.925,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9607843137254902,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9932432432432432,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04917807877063751,
"eval_overall_accuracy": 0.9914905297831458,
"eval_overall_f1": 0.9608091024020228,
"eval_overall_precision": 0.9669211195928753,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6423,
"eval_samples_per_second": 264.692,
"eval_steps_per_second": 4.671,
"step": 6432
},
{
"epoch": 68.0,
"grad_norm": 0.7401499152183533,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0096,
"step": 6528
},
{
"epoch": 68.0,
"eval_LOCATION_f1": 0.9322033898305084,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9442622950819672,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9795918367346939,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.052167050540447235,
"eval_overall_accuracy": 0.9903925336261323,
"eval_overall_f1": 0.9518987341772152,
"eval_overall_precision": 0.9591836734693877,
"eval_overall_recall": 0.9447236180904522,
"eval_runtime": 0.6398,
"eval_samples_per_second": 265.708,
"eval_steps_per_second": 4.689,
"step": 6528
},
{
"epoch": 69.0,
"grad_norm": 0.19779767096042633,
"learning_rate": 1.55e-05,
"loss": 0.0092,
"step": 6624
},
{
"epoch": 69.0,
"eval_LOCATION_f1": 0.9322033898305084,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.954248366013072,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9864864864864865,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.05200101062655449,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9545454545454546,
"eval_overall_precision": 0.9593908629441624,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.6403,
"eval_samples_per_second": 265.505,
"eval_steps_per_second": 4.685,
"step": 6624
},
{
"epoch": 70.0,
"grad_norm": 0.029442617669701576,
"learning_rate": 1.5e-05,
"loss": 0.008,
"step": 6720
},
{
"epoch": 70.0,
"eval_LOCATION_f1": 0.9406779661016949,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.925,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04805196449160576,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9571788413098236,
"eval_overall_precision": 0.9595959595959596,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6445,
"eval_samples_per_second": 263.778,
"eval_steps_per_second": 4.655,
"step": 6720
},
{
"epoch": 71.0,
"grad_norm": 0.21900275349617004,
"learning_rate": 1.45e-05,
"loss": 0.0085,
"step": 6816
},
{
"epoch": 71.0,
"eval_LOCATION_f1": 0.9411764705882353,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9180327868852459,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.954248366013072,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9864864864864865,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04967062175273895,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.9609079445145018,
"eval_overall_precision": 0.9645569620253165,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.6448,
"eval_samples_per_second": 263.653,
"eval_steps_per_second": 4.653,
"step": 6816
},
{
"epoch": 72.0,
"grad_norm": 0.07786503434181213,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.0105,
"step": 6912
},
{
"epoch": 72.0,
"eval_LOCATION_f1": 0.9310344827586207,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9310344827586207,
"eval_LOCATION_recall": 0.9310344827586207,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9721115537848605,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9606299212598425,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04753611981868744,
"eval_overall_accuracy": 0.9898435355476256,
"eval_overall_f1": 0.952020202020202,
"eval_overall_precision": 0.9568527918781726,
"eval_overall_recall": 0.9472361809045227,
"eval_runtime": 0.7078,
"eval_samples_per_second": 240.174,
"eval_steps_per_second": 4.238,
"step": 6912
},
{
"epoch": 73.0,
"grad_norm": 0.6542889475822449,
"learning_rate": 1.3500000000000001e-05,
"loss": 0.0089,
"step": 7008
},
{
"epoch": 73.0,
"eval_LOCATION_f1": 0.9322033898305084,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04598424956202507,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.9559748427672956,
"eval_overall_precision": 0.9571788413098237,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6464,
"eval_samples_per_second": 263.003,
"eval_steps_per_second": 4.641,
"step": 7008
},
{
"epoch": 74.0,
"grad_norm": 0.036601122468709946,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.009,
"step": 7104
},
{
"epoch": 74.0,
"eval_LOCATION_f1": 0.9276595744680851,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9159663865546218,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.047892238944768906,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9546599496221662,
"eval_overall_precision": 0.9570707070707071,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.6433,
"eval_samples_per_second": 264.273,
"eval_steps_per_second": 4.664,
"step": 7104
},
{
"epoch": 75.0,
"grad_norm": 0.2045198231935501,
"learning_rate": 1.25e-05,
"loss": 0.0077,
"step": 7200
},
{
"epoch": 75.0,
"eval_LOCATION_f1": 0.9327731092436975,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9098360655737705,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9545454545454545,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.98,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04896987974643707,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9547738693467337,
"eval_overall_precision": 0.9547738693467337,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6474,
"eval_samples_per_second": 262.578,
"eval_steps_per_second": 4.634,
"step": 7200
},
{
"epoch": 76.0,
"grad_norm": 0.017027413472533226,
"learning_rate": 1.2e-05,
"loss": 0.0078,
"step": 7296
},
{
"epoch": 76.0,
"eval_LOCATION_f1": 0.9327731092436975,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9098360655737705,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9579288025889968,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9801324503311258,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04453570023179054,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.9597989949748744,
"eval_overall_precision": 0.9597989949748744,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.6463,
"eval_samples_per_second": 263.039,
"eval_steps_per_second": 4.642,
"step": 7296
},
{
"epoch": 77.0,
"grad_norm": 0.07646860927343369,
"learning_rate": 1.1500000000000002e-05,
"loss": 0.007,
"step": 7392
},
{
"epoch": 77.0,
"eval_LOCATION_f1": 0.9322033898305084,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9449838187702266,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9668874172185431,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04591028392314911,
"eval_overall_accuracy": 0.9898435355476256,
"eval_overall_f1": 0.9546599496221662,
"eval_overall_precision": 0.9570707070707071,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.6677,
"eval_samples_per_second": 254.59,
"eval_steps_per_second": 4.493,
"step": 7392
},
{
"epoch": 78.0,
"grad_norm": 0.3783327043056488,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.0071,
"step": 7488
},
{
"epoch": 78.0,
"eval_LOCATION_f1": 0.9322033898305084,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9385113268608414,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9602649006622517,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.047588180750608444,
"eval_overall_accuracy": 0.9898435355476256,
"eval_overall_f1": 0.9484276729559749,
"eval_overall_precision": 0.9496221662468514,
"eval_overall_recall": 0.9472361809045227,
"eval_runtime": 0.6365,
"eval_samples_per_second": 267.065,
"eval_steps_per_second": 4.713,
"step": 7488
},
{
"epoch": 79.0,
"grad_norm": 0.011126444675028324,
"learning_rate": 1.05e-05,
"loss": 0.0073,
"step": 7584
},
{
"epoch": 79.0,
"eval_LOCATION_f1": 0.9322033898305084,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.048617471009492874,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9559748427672956,
"eval_overall_precision": 0.9571788413098237,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6524,
"eval_samples_per_second": 260.589,
"eval_steps_per_second": 4.599,
"step": 7584
},
{
"epoch": 80.0,
"grad_norm": 0.05649706348776817,
"learning_rate": 1e-05,
"loss": 0.0075,
"step": 7680
},
{
"epoch": 80.0,
"eval_LOCATION_f1": 0.9367088607594937,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9173553719008265,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04871730878949165,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9559748427672956,
"eval_overall_precision": 0.9571788413098237,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6486,
"eval_samples_per_second": 262.118,
"eval_steps_per_second": 4.626,
"step": 7680
},
{
"epoch": 81.0,
"grad_norm": 0.2810403108596802,
"learning_rate": 9.5e-06,
"loss": 0.0085,
"step": 7776
},
{
"epoch": 81.0,
"eval_LOCATION_f1": 0.9237288135593221,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9083333333333333,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.952076677316294,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9612903225806452,
"eval_ORGANIZATION_recall": 0.9430379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04575835540890694,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9548872180451128,
"eval_overall_precision": 0.9525,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.6447,
"eval_samples_per_second": 263.707,
"eval_steps_per_second": 4.654,
"step": 7776
},
{
"epoch": 82.0,
"grad_norm": 0.7172192335128784,
"learning_rate": 9e-06,
"loss": 0.0075,
"step": 7872
},
{
"epoch": 82.0,
"eval_LOCATION_f1": 0.9327731092436975,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9098360655737705,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9612903225806452,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9802631578947368,
"eval_ORGANIZATION_recall": 0.9430379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04576858505606651,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9611041405269761,
"eval_overall_precision": 0.9598997493734336,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.6629,
"eval_samples_per_second": 256.442,
"eval_steps_per_second": 4.525,
"step": 7872
},
{
"epoch": 83.0,
"grad_norm": 0.01395861990749836,
"learning_rate": 8.500000000000002e-06,
"loss": 0.0069,
"step": 7968
},
{
"epoch": 83.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9576547231270359,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9865771812080537,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04589336737990379,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.9583858764186632,
"eval_overall_precision": 0.9620253164556962,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6513,
"eval_samples_per_second": 260.997,
"eval_steps_per_second": 4.606,
"step": 7968
},
{
"epoch": 84.0,
"grad_norm": 0.02708912268280983,
"learning_rate": 8.000000000000001e-06,
"loss": 0.006,
"step": 8064
},
{
"epoch": 84.0,
"eval_LOCATION_f1": 0.9411764705882353,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9180327868852459,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9612903225806452,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9802631578947368,
"eval_ORGANIZATION_recall": 0.9430379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04835737869143486,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9636135508155583,
"eval_overall_precision": 0.9624060150375939,
"eval_overall_recall": 0.964824120603015,
"eval_runtime": 0.6317,
"eval_samples_per_second": 269.107,
"eval_steps_per_second": 4.749,
"step": 8064
},
{
"epoch": 85.0,
"grad_norm": 0.20577897131443024,
"learning_rate": 7.5e-06,
"loss": 0.0074,
"step": 8160
},
{
"epoch": 85.0,
"eval_LOCATION_f1": 0.9451476793248945,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9256198347107438,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9415584415584415,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9666666666666667,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.05014825612306595,
"eval_overall_accuracy": 0.9903925336261323,
"eval_overall_f1": 0.9546599496221662,
"eval_overall_precision": 0.9570707070707071,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.6402,
"eval_samples_per_second": 265.524,
"eval_steps_per_second": 4.686,
"step": 8160
},
{
"epoch": 86.0,
"grad_norm": 0.07519058883190155,
"learning_rate": 7.000000000000001e-06,
"loss": 0.0066,
"step": 8256
},
{
"epoch": 86.0,
"eval_LOCATION_f1": 0.9327731092436975,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9098360655737705,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.948051948051948,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9733333333333334,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04891950264573097,
"eval_overall_accuracy": 0.9898435355476256,
"eval_overall_f1": 0.9559748427672956,
"eval_overall_precision": 0.9571788413098237,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6406,
"eval_samples_per_second": 265.379,
"eval_steps_per_second": 4.683,
"step": 8256
},
{
"epoch": 87.0,
"grad_norm": 0.21884454786777496,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.0073,
"step": 8352
},
{
"epoch": 87.0,
"eval_LOCATION_f1": 0.9367088607594937,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9173553719008265,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9579288025889968,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9801324503311258,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.046904049813747406,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9610062893081761,
"eval_overall_precision": 0.9622166246851386,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.645,
"eval_samples_per_second": 263.559,
"eval_steps_per_second": 4.651,
"step": 8352
},
{
"epoch": 88.0,
"grad_norm": 0.21439653635025024,
"learning_rate": 6e-06,
"loss": 0.0064,
"step": 8448
},
{
"epoch": 88.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9579288025889968,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9801324503311258,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04628141596913338,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9584905660377357,
"eval_overall_precision": 0.9596977329974811,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.6879,
"eval_samples_per_second": 247.12,
"eval_steps_per_second": 4.361,
"step": 8448
},
{
"epoch": 89.0,
"grad_norm": 0.8145310878753662,
"learning_rate": 5.500000000000001e-06,
"loss": 0.0055,
"step": 8544
},
{
"epoch": 89.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9579288025889968,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9801324503311258,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.048927295953035355,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.957286432160804,
"eval_overall_precision": 0.957286432160804,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.6381,
"eval_samples_per_second": 266.411,
"eval_steps_per_second": 4.701,
"step": 8544
},
{
"epoch": 90.0,
"grad_norm": 0.35854557156562805,
"learning_rate": 5e-06,
"loss": 0.0068,
"step": 8640
},
{
"epoch": 90.0,
"eval_LOCATION_f1": 0.9322033898305084,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04757599160075188,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9534591194968555,
"eval_overall_precision": 0.9546599496221663,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.6424,
"eval_samples_per_second": 264.629,
"eval_steps_per_second": 4.67,
"step": 8640
},
{
"epoch": 91.0,
"grad_norm": 0.07453371584415436,
"learning_rate": 4.5e-06,
"loss": 0.0066,
"step": 8736
},
{
"epoch": 91.0,
"eval_LOCATION_f1": 0.9367088607594937,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9173553719008265,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9612903225806452,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9802631578947368,
"eval_ORGANIZATION_recall": 0.9430379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.046665437519550323,
"eval_overall_accuracy": 0.9914905297831458,
"eval_overall_f1": 0.9623115577889447,
"eval_overall_precision": 0.9623115577889447,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.6328,
"eval_samples_per_second": 268.657,
"eval_steps_per_second": 4.741,
"step": 8736
},
{
"epoch": 92.0,
"grad_norm": 1.4774054288864136,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0069,
"step": 8832
},
{
"epoch": 92.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9511400651465798,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9798657718120806,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04833124950528145,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.953341740226986,
"eval_overall_precision": 0.9569620253164557,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.6433,
"eval_samples_per_second": 264.243,
"eval_steps_per_second": 4.663,
"step": 8832
},
{
"epoch": 93.0,
"grad_norm": 0.516577959060669,
"learning_rate": 3.5000000000000004e-06,
"loss": 0.0064,
"step": 8928
},
{
"epoch": 93.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04831400513648987,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.9547738693467337,
"eval_overall_precision": 0.9547738693467337,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.645,
"eval_samples_per_second": 263.581,
"eval_steps_per_second": 4.651,
"step": 8928
},
{
"epoch": 94.0,
"grad_norm": 0.021556708961725235,
"learning_rate": 3e-06,
"loss": 0.0065,
"step": 9024
},
{
"epoch": 94.0,
"eval_LOCATION_f1": 0.9367088607594937,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9173553719008265,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04879750683903694,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.957286432160804,
"eval_overall_precision": 0.957286432160804,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.6493,
"eval_samples_per_second": 261.815,
"eval_steps_per_second": 4.62,
"step": 9024
},
{
"epoch": 95.0,
"grad_norm": 0.10190509259700775,
"learning_rate": 2.5e-06,
"loss": 0.0059,
"step": 9120
},
{
"epoch": 95.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9576547231270359,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9865771812080537,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04920661076903343,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9558638083228247,
"eval_overall_precision": 0.959493670886076,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.6424,
"eval_samples_per_second": 264.646,
"eval_steps_per_second": 4.67,
"step": 9120
},
{
"epoch": 96.0,
"grad_norm": 0.015212666243314743,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0054,
"step": 9216
},
{
"epoch": 96.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04876454174518585,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9535759096612295,
"eval_overall_precision": 0.9523809523809523,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6531,
"eval_samples_per_second": 260.301,
"eval_steps_per_second": 4.594,
"step": 9216
},
{
"epoch": 97.0,
"grad_norm": 0.02505590207874775,
"learning_rate": 1.5e-06,
"loss": 0.0062,
"step": 9312
},
{
"epoch": 97.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.961038961038961,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9866666666666667,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04920656606554985,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9559748427672956,
"eval_overall_precision": 0.9571788413098237,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6379,
"eval_samples_per_second": 266.497,
"eval_steps_per_second": 4.703,
"step": 9312
},
{
"epoch": 98.0,
"grad_norm": 0.1335475891828537,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0064,
"step": 9408
},
{
"epoch": 98.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9579288025889968,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9801324503311258,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.049772508442401886,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.9547738693467337,
"eval_overall_precision": 0.9547738693467337,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.645,
"eval_samples_per_second": 263.57,
"eval_steps_per_second": 4.651,
"step": 9408
},
{
"epoch": 99.0,
"grad_norm": 0.15670600533485413,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0058,
"step": 9504
},
{
"epoch": 99.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9579288025889968,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9801324503311258,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.049256470054388046,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.9547738693467337,
"eval_overall_precision": 0.9547738693467337,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6514,
"eval_samples_per_second": 260.981,
"eval_steps_per_second": 4.606,
"step": 9504
},
{
"epoch": 100.0,
"grad_norm": 0.9807391166687012,
"learning_rate": 0.0,
"loss": 0.0059,
"step": 9600
},
{
"epoch": 100.0,
"eval_LOCATION_f1": 0.9282700421940928,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9090909090909091,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9579288025889968,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9801324503311258,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.049376122653484344,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9559748427672956,
"eval_overall_precision": 0.9571788413098237,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.6514,
"eval_samples_per_second": 260.977,
"eval_steps_per_second": 4.605,
"step": 9600
},
{
"epoch": 100.0,
"step": 9600,
"total_flos": 4293597283743744.0,
"train_loss": 0.035835961190362774,
"train_runtime": 1119.0892,
"train_samples_per_second": 136.808,
"train_steps_per_second": 8.578
}
],
"logging_steps": 500,
"max_steps": 9600,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 4293597283743744.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}