|
{ |
|
"best_metric": 0.9323447636700648, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4333", |
|
"epoch": 9.989615784008308, |
|
"eval_steps": 500, |
|
"global_step": 4810, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9989615784008308, |
|
"eval_accuracy": 0.9987441743644127, |
|
"eval_f1": 0.9291628334866606, |
|
"eval_loss": 0.004182814620435238, |
|
"eval_precision": 0.9173478655767484, |
|
"eval_recall": 0.9412861136999068, |
|
"eval_runtime": 15.1684, |
|
"eval_samples_per_second": 457.925, |
|
"eval_steps_per_second": 57.29, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.0384215991692627, |
|
"grad_norm": 0.07515838742256165, |
|
"learning_rate": 4.48024948024948e-05, |
|
"loss": 0.0156, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9986464990372004, |
|
"eval_f1": 0.9189439555349699, |
|
"eval_loss": 0.004935940261930227, |
|
"eval_precision": 0.9134438305709024, |
|
"eval_recall": 0.9245107176141659, |
|
"eval_runtime": 15.18, |
|
"eval_samples_per_second": 457.575, |
|
"eval_steps_per_second": 57.246, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.0768431983385254, |
|
"grad_norm": 0.005299085285514593, |
|
"learning_rate": 3.9604989604989604e-05, |
|
"loss": 0.0039, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.9989615784008308, |
|
"eval_accuracy": 0.9985627773281612, |
|
"eval_f1": 0.9191873589164785, |
|
"eval_loss": 0.005292736925184727, |
|
"eval_precision": 0.8914185639229422, |
|
"eval_recall": 0.9487418452935694, |
|
"eval_runtime": 15.1155, |
|
"eval_samples_per_second": 459.529, |
|
"eval_steps_per_second": 57.491, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 3.115264797507788, |
|
"grad_norm": 0.06427361071109772, |
|
"learning_rate": 3.4407484407484405e-05, |
|
"loss": 0.0024, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9984720788100354, |
|
"eval_f1": 0.9167412712623096, |
|
"eval_loss": 0.0061088865622878075, |
|
"eval_precision": 0.8819982773471146, |
|
"eval_recall": 0.9543336439888164, |
|
"eval_runtime": 15.4198, |
|
"eval_samples_per_second": 450.46, |
|
"eval_steps_per_second": 56.356, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 4.153686396677051, |
|
"grad_norm": 0.04428843781352043, |
|
"learning_rate": 2.920997920997921e-05, |
|
"loss": 0.0017, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.998961578400831, |
|
"eval_accuracy": 0.9986046381826807, |
|
"eval_f1": 0.9254284390921722, |
|
"eval_loss": 0.007444376591593027, |
|
"eval_precision": 0.919889502762431, |
|
"eval_recall": 0.9310344827586207, |
|
"eval_runtime": 15.1205, |
|
"eval_samples_per_second": 459.377, |
|
"eval_steps_per_second": 57.472, |
|
"step": 2407 |
|
}, |
|
{ |
|
"epoch": 5.192107995846314, |
|
"grad_norm": 0.004208261147141457, |
|
"learning_rate": 2.4012474012474013e-05, |
|
"loss": 0.0011, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9985767309463344, |
|
"eval_f1": 0.9266943291839557, |
|
"eval_loss": 0.007942954078316689, |
|
"eval_precision": 0.916970802919708, |
|
"eval_recall": 0.9366262814538676, |
|
"eval_runtime": 15.1887, |
|
"eval_samples_per_second": 457.314, |
|
"eval_steps_per_second": 57.214, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 6.230529595015576, |
|
"grad_norm": 0.004876282997429371, |
|
"learning_rate": 1.8814968814968818e-05, |
|
"loss": 0.0007, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.998961578400831, |
|
"eval_accuracy": 0.9986744062735468, |
|
"eval_f1": 0.9254004576659038, |
|
"eval_loss": 0.006659395061433315, |
|
"eval_precision": 0.9091726618705036, |
|
"eval_recall": 0.9422180801491147, |
|
"eval_runtime": 15.1599, |
|
"eval_samples_per_second": 458.182, |
|
"eval_steps_per_second": 57.322, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 7.268951194184839, |
|
"grad_norm": 0.0032651671208441257, |
|
"learning_rate": 1.3617463617463619e-05, |
|
"loss": 0.0005, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9986674294644602, |
|
"eval_f1": 0.9275092936802974, |
|
"eval_loss": 0.007326104678213596, |
|
"eval_precision": 0.9249304911955515, |
|
"eval_recall": 0.9301025163094129, |
|
"eval_runtime": 15.1332, |
|
"eval_samples_per_second": 458.991, |
|
"eval_steps_per_second": 57.423, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 8.307372793354102, |
|
"grad_norm": 0.0013715826207771897, |
|
"learning_rate": 8.419958419958421e-06, |
|
"loss": 0.0004, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.99896157840083, |
|
"eval_accuracy": 0.9987162671280663, |
|
"eval_f1": 0.9323447636700648, |
|
"eval_loss": 0.008018395863473415, |
|
"eval_precision": 0.9271889400921659, |
|
"eval_recall": 0.9375582479030755, |
|
"eval_runtime": 15.4151, |
|
"eval_samples_per_second": 450.598, |
|
"eval_steps_per_second": 56.373, |
|
"step": 4333 |
|
}, |
|
{ |
|
"epoch": 9.345794392523365, |
|
"grad_norm": 0.005300257820636034, |
|
"learning_rate": 3.2224532224532228e-06, |
|
"loss": 0.0002, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.989615784008308, |
|
"eval_accuracy": 0.9987092903189797, |
|
"eval_f1": 0.9315448658649399, |
|
"eval_loss": 0.007857992313802242, |
|
"eval_precision": 0.9247015610651974, |
|
"eval_recall": 0.9384902143522833, |
|
"eval_runtime": 15.1843, |
|
"eval_samples_per_second": 457.447, |
|
"eval_steps_per_second": 57.23, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 9.989615784008308, |
|
"step": 4810, |
|
"total_flos": 1.3132946802550608e+16, |
|
"train_loss": 0.0027612092573652642, |
|
"train_runtime": 2084.9871, |
|
"train_samples_per_second": 147.78, |
|
"train_steps_per_second": 2.307 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4810, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3132946802550608e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|