{ "best_metric": 0.9093852639198303, "best_model_checkpoint": "GeoBERT/checkpoint-500", "epoch": 2.0, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 9.578972816467285, "learning_rate": 4.4e-06, "loss": 1.3411, "step": 11 }, { "epoch": 0.09, "grad_norm": 10.498757362365723, "learning_rate": 8.8e-06, "loss": 1.2504, "step": 22 }, { "epoch": 0.13, "grad_norm": 6.937590599060059, "learning_rate": 1.32e-05, "loss": 1.234, "step": 33 }, { "epoch": 0.18, "grad_norm": 4.935857772827148, "learning_rate": 1.76e-05, "loss": 1.1156, "step": 44 }, { "epoch": 0.22, "grad_norm": 8.856083869934082, "learning_rate": 2.2000000000000003e-05, "loss": 0.9942, "step": 55 }, { "epoch": 0.26, "grad_norm": 7.843554973602295, "learning_rate": 2.64e-05, "loss": 0.9665, "step": 66 }, { "epoch": 0.31, "grad_norm": 7.1852126121521, "learning_rate": 3.08e-05, "loss": 1.1064, "step": 77 }, { "epoch": 0.35, "grad_norm": 11.667104721069336, "learning_rate": 3.52e-05, "loss": 1.1114, "step": 88 }, { "epoch": 0.4, "grad_norm": 10.513388633728027, "learning_rate": 3.960000000000001e-05, "loss": 0.9571, "step": 99 }, { "epoch": 0.44, "grad_norm": 9.239156723022461, "learning_rate": 4.4000000000000006e-05, "loss": 0.93, "step": 110 }, { "epoch": 0.48, "grad_norm": 6.3955302238464355, "learning_rate": 4.8e-05, "loss": 0.9277, "step": 121 }, { "epoch": 0.53, "grad_norm": 7.602996826171875, "learning_rate": 4.973333333333334e-05, "loss": 0.9696, "step": 132 }, { "epoch": 0.57, "grad_norm": 6.260980129241943, "learning_rate": 4.924444444444445e-05, "loss": 0.9792, "step": 143 }, { "epoch": 0.62, "grad_norm": 12.462403297424316, "learning_rate": 4.875555555555556e-05, "loss": 1.1474, "step": 154 }, { "epoch": 0.66, "grad_norm": 11.24273681640625, "learning_rate": 4.826666666666667e-05, "loss": 1.0853, "step": 165 }, { "epoch": 0.7, "grad_norm": 5.870580673217773, "learning_rate": 4.7777777777777784e-05, "loss": 1.0063, "step": 176 }, { "epoch": 0.75, "grad_norm": 7.315596580505371, "learning_rate": 4.728888888888889e-05, "loss": 0.8158, "step": 187 }, { "epoch": 0.79, "grad_norm": 9.08304214477539, "learning_rate": 4.6800000000000006e-05, "loss": 0.8146, "step": 198 }, { "epoch": 0.84, "grad_norm": 7.242616176605225, "learning_rate": 4.6311111111111113e-05, "loss": 0.881, "step": 209 }, { "epoch": 0.88, "grad_norm": 6.810551643371582, "learning_rate": 4.582222222222222e-05, "loss": 1.02, "step": 220 }, { "epoch": 0.92, "grad_norm": 9.252344131469727, "learning_rate": 4.5333333333333335e-05, "loss": 0.8745, "step": 231 }, { "epoch": 0.97, "grad_norm": 5.250942707061768, "learning_rate": 4.484444444444444e-05, "loss": 0.8428, "step": 242 }, { "epoch": 1.0, "eval_accuracy": 0.5794979079497908, "eval_f1_macro": 0.3207391363205631, "eval_f1_micro": 0.5794979079497908, "eval_f1_weighted": 0.5288849063689292, "eval_loss": 1.0058727264404297, "eval_precision_macro": 0.30283025619006726, "eval_precision_micro": 0.5794979079497908, "eval_precision_weighted": 0.5213967302096602, "eval_recall_macro": 0.37342191224507815, "eval_recall_micro": 0.5794979079497908, "eval_recall_weighted": 0.5794979079497908, "eval_runtime": 6.8009, "eval_samples_per_second": 70.284, "eval_steps_per_second": 4.411, "step": 250 }, { "epoch": 1.01, "grad_norm": 8.098525047302246, "learning_rate": 4.435555555555556e-05, "loss": 0.8954, "step": 253 }, { "epoch": 1.06, "grad_norm": 8.491247177124023, "learning_rate": 4.3866666666666665e-05, "loss": 0.6923, "step": 264 }, { "epoch": 1.1, "grad_norm": 8.242985725402832, "learning_rate": 4.337777777777778e-05, "loss": 0.7433, "step": 275 }, { "epoch": 1.14, "grad_norm": 9.606861114501953, "learning_rate": 4.2888888888888886e-05, "loss": 0.7633, "step": 286 }, { "epoch": 1.19, "grad_norm": 4.706633567810059, "learning_rate": 4.24e-05, "loss": 0.6287, "step": 297 }, { "epoch": 1.23, "grad_norm": 16.51137351989746, "learning_rate": 4.1911111111111115e-05, "loss": 0.6284, "step": 308 }, { "epoch": 1.28, "grad_norm": 13.114934921264648, "learning_rate": 4.142222222222222e-05, "loss": 0.8798, "step": 319 }, { "epoch": 1.32, "grad_norm": 10.896364212036133, "learning_rate": 4.093333333333334e-05, "loss": 0.6527, "step": 330 }, { "epoch": 1.36, "grad_norm": 7.2867913246154785, "learning_rate": 4.0444444444444444e-05, "loss": 0.5512, "step": 341 }, { "epoch": 1.41, "grad_norm": 6.902933597564697, "learning_rate": 3.995555555555556e-05, "loss": 0.7718, "step": 352 }, { "epoch": 1.45, "grad_norm": 6.432639122009277, "learning_rate": 3.9466666666666666e-05, "loss": 0.5573, "step": 363 }, { "epoch": 1.5, "grad_norm": 7.4120073318481445, "learning_rate": 3.897777777777778e-05, "loss": 0.6316, "step": 374 }, { "epoch": 1.54, "grad_norm": 4.322595596313477, "learning_rate": 3.848888888888889e-05, "loss": 0.6219, "step": 385 }, { "epoch": 1.58, "grad_norm": 5.752494812011719, "learning_rate": 3.8e-05, "loss": 0.5568, "step": 396 }, { "epoch": 1.63, "grad_norm": 13.906458854675293, "learning_rate": 3.7511111111111116e-05, "loss": 0.7011, "step": 407 }, { "epoch": 1.67, "grad_norm": 1.0408498048782349, "learning_rate": 3.7022222222222224e-05, "loss": 0.3148, "step": 418 }, { "epoch": 1.72, "grad_norm": 9.536336898803711, "learning_rate": 3.653333333333334e-05, "loss": 0.7209, "step": 429 }, { "epoch": 1.76, "grad_norm": 4.775065898895264, "learning_rate": 3.6044444444444446e-05, "loss": 0.637, "step": 440 }, { "epoch": 1.8, "grad_norm": 16.23617172241211, "learning_rate": 3.555555555555556e-05, "loss": 0.6103, "step": 451 }, { "epoch": 1.85, "grad_norm": 15.569254875183105, "learning_rate": 3.506666666666667e-05, "loss": 0.5131, "step": 462 }, { "epoch": 1.89, "grad_norm": 11.499606132507324, "learning_rate": 3.457777777777778e-05, "loss": 0.5525, "step": 473 }, { "epoch": 1.94, "grad_norm": 8.275940895080566, "learning_rate": 3.408888888888889e-05, "loss": 0.5643, "step": 484 }, { "epoch": 1.98, "grad_norm": 5.744302272796631, "learning_rate": 3.3600000000000004e-05, "loss": 0.4863, "step": 495 }, { "epoch": 2.0, "eval_accuracy": 0.6589958158995816, "eval_f1_macro": 0.5823605917694434, "eval_f1_micro": 0.6589958158995816, "eval_f1_weighted": 0.6565370798157575, "eval_loss": 0.9093852639198303, "eval_precision_macro": 0.6115906152670858, "eval_precision_micro": 0.6589958158995816, "eval_precision_weighted": 0.6611483212824585, "eval_recall_macro": 0.5656902456694988, "eval_recall_micro": 0.6589958158995816, "eval_recall_weighted": 0.6589958158995816, "eval_runtime": 6.8834, "eval_samples_per_second": 69.442, "eval_steps_per_second": 4.358, "step": 500 } ], "logging_steps": 11, "max_steps": 1250, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 1051936888823808.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }