sentiment-pt-pl30-3 / trainer_state.json
apwic's picture
End of training
f835a20 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 4.9823102951049805,
"learning_rate": 4.75e-05,
"loss": 0.5456,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7468671679197995,
"eval_f1": 0.6641666041651041,
"eval_loss": 0.49161723256111145,
"eval_precision": 0.6912698412698413,
"eval_recall": 0.6533915257319513,
"eval_runtime": 1.7896,
"eval_samples_per_second": 222.952,
"eval_steps_per_second": 27.939,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 2.931857109069824,
"learning_rate": 4.5e-05,
"loss": 0.4369,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.8120300751879699,
"eval_f1": 0.7912087912087912,
"eval_loss": 0.41080209612846375,
"eval_precision": 0.7802706552706553,
"eval_recall": 0.822013093289689,
"eval_runtime": 1.8221,
"eval_samples_per_second": 218.982,
"eval_steps_per_second": 27.441,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 3.00280499458313,
"learning_rate": 4.25e-05,
"loss": 0.3316,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.8571428571428571,
"eval_f1": 0.81524926686217,
"eval_loss": 0.3294452726840973,
"eval_precision": 0.8462682339611953,
"eval_recall": 0.7964175304600837,
"eval_runtime": 1.7904,
"eval_samples_per_second": 222.854,
"eval_steps_per_second": 27.927,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 3.0830881595611572,
"learning_rate": 4e-05,
"loss": 0.2909,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8771929824561403,
"eval_f1": 0.8499551039516197,
"eval_loss": 0.30187976360321045,
"eval_precision": 0.8547473867595818,
"eval_recall": 0.8456082924168031,
"eval_runtime": 1.8,
"eval_samples_per_second": 221.672,
"eval_steps_per_second": 27.778,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 2.5169291496276855,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.2584,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.842789598108747,
"eval_loss": 0.3023463487625122,
"eval_precision": 0.842789598108747,
"eval_recall": 0.842789598108747,
"eval_runtime": 1.7982,
"eval_samples_per_second": 221.888,
"eval_steps_per_second": 27.805,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 1.894607424736023,
"learning_rate": 3.5e-05,
"loss": 0.237,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8375505157126486,
"eval_loss": 0.30203157663345337,
"eval_precision": 0.8359243697478991,
"eval_recall": 0.8392434988179669,
"eval_runtime": 1.7962,
"eval_samples_per_second": 222.138,
"eval_steps_per_second": 27.837,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 0.33639559149742126,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.2186,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8721804511278195,
"eval_f1": 0.8491157824491158,
"eval_loss": 0.2988818287849426,
"eval_precision": 0.8424908424908425,
"eval_recall": 0.8570649208947081,
"eval_runtime": 1.8005,
"eval_samples_per_second": 221.607,
"eval_steps_per_second": 27.77,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 1.405568242073059,
"learning_rate": 3e-05,
"loss": 0.2108,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8872180451127819,
"eval_f1": 0.8614765038536611,
"eval_loss": 0.29612118005752563,
"eval_precision": 0.8686536646744258,
"eval_recall": 0.8552009456264775,
"eval_runtime": 1.8282,
"eval_samples_per_second": 218.244,
"eval_steps_per_second": 27.349,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 6.2904157638549805,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.1898,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.87468671679198,
"eval_f1": 0.8480717680029244,
"eval_loss": 0.30133265256881714,
"eval_precision": 0.8498775260257195,
"eval_recall": 0.8463356973995272,
"eval_runtime": 1.7951,
"eval_samples_per_second": 222.272,
"eval_steps_per_second": 27.854,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 3.820188522338867,
"learning_rate": 2.5e-05,
"loss": 0.1894,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.87468671679198,
"eval_f1": 0.8456742372671576,
"eval_loss": 0.32312536239624023,
"eval_precision": 0.8536697247706422,
"eval_recall": 0.8388343335151845,
"eval_runtime": 1.8085,
"eval_samples_per_second": 220.63,
"eval_steps_per_second": 27.648,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 1.6913771629333496,
"learning_rate": 2.25e-05,
"loss": 0.1817,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8771929824561403,
"eval_f1": 0.8514869535493182,
"eval_loss": 0.301199346780777,
"eval_precision": 0.8523821128305106,
"eval_recall": 0.8506092016730314,
"eval_runtime": 1.8217,
"eval_samples_per_second": 219.025,
"eval_steps_per_second": 27.447,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 5.912967681884766,
"learning_rate": 2e-05,
"loss": 0.1723,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8359175094431583,
"eval_loss": 0.29794374108314514,
"eval_precision": 0.8376607470912432,
"eval_recall": 0.8342425895617385,
"eval_runtime": 1.7955,
"eval_samples_per_second": 222.218,
"eval_steps_per_second": 27.847,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 0.4766283929347992,
"learning_rate": 1.75e-05,
"loss": 0.1547,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.8411818738518064,
"eval_loss": 0.2937452793121338,
"eval_precision": 0.8448542607834644,
"eval_recall": 0.8377886888525186,
"eval_runtime": 1.8242,
"eval_samples_per_second": 218.73,
"eval_steps_per_second": 27.41,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 4.687190532684326,
"learning_rate": 1.5e-05,
"loss": 0.1569,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.8485547445255475,
"eval_loss": 0.3064776360988617,
"eval_precision": 0.8383458646616542,
"eval_recall": 0.8627932351336607,
"eval_runtime": 1.7921,
"eval_samples_per_second": 222.641,
"eval_steps_per_second": 27.9,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 4.828568458557129,
"learning_rate": 1.25e-05,
"loss": 0.1442,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8771929824561403,
"eval_f1": 0.8529524583135901,
"eval_loss": 0.2884095013141632,
"eval_precision": 0.8504480286738352,
"eval_recall": 0.8556101109292599,
"eval_runtime": 1.8008,
"eval_samples_per_second": 221.563,
"eval_steps_per_second": 27.765,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 3.8095035552978516,
"learning_rate": 1e-05,
"loss": 0.1435,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8796992481203008,
"eval_f1": 0.8563025210084034,
"eval_loss": 0.30156683921813965,
"eval_precision": 0.8529936381473334,
"eval_recall": 0.8598836152027641,
"eval_runtime": 1.8479,
"eval_samples_per_second": 215.919,
"eval_steps_per_second": 27.058,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 0.34168025851249695,
"learning_rate": 7.5e-06,
"loss": 0.1378,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8771929824561403,
"eval_f1": 0.8556962025316456,
"eval_loss": 0.31144392490386963,
"eval_precision": 0.8477571494464944,
"eval_recall": 0.8656119294417166,
"eval_runtime": 1.8294,
"eval_samples_per_second": 218.106,
"eval_steps_per_second": 27.332,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 0.2586296498775482,
"learning_rate": 5e-06,
"loss": 0.1377,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8771929824561403,
"eval_f1": 0.8550328105883662,
"eval_loss": 0.3096340000629425,
"eval_precision": 0.8482905982905984,
"eval_recall": 0.8631114748136025,
"eval_runtime": 1.8332,
"eval_samples_per_second": 217.658,
"eval_steps_per_second": 27.275,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 0.0663389191031456,
"learning_rate": 2.5e-06,
"loss": 0.1307,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8796992481203008,
"eval_f1": 0.8556004584112431,
"eval_loss": 0.30646833777427673,
"eval_precision": 0.8538865546218487,
"eval_recall": 0.85738316057465,
"eval_runtime": 1.8307,
"eval_samples_per_second": 217.954,
"eval_steps_per_second": 27.313,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 3.8260014057159424,
"learning_rate": 0.0,
"loss": 0.126,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8872180451127819,
"eval_f1": 0.8656072933585827,
"eval_loss": 0.30540019273757935,
"eval_precision": 0.8614399005740664,
"eval_recall": 0.8702036733951628,
"eval_runtime": 1.8294,
"eval_samples_per_second": 218.099,
"eval_steps_per_second": 27.331,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 8444128359504000.0,
"train_loss": 0.21972156196344095,
"train_runtime": 635.9425,
"train_samples_per_second": 114.413,
"train_steps_per_second": 3.837
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 8444128359504000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}