sentiment-pt-pl10-2 / trainer_state.json
apwic's picture
End of training
34c0a60 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 4.458889961242676,
"learning_rate": 4.75e-05,
"loss": 0.5533,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7268170426065163,
"eval_f1": 0.6326680574676724,
"eval_loss": 0.5133728384971619,
"eval_precision": 0.6605799373040753,
"eval_recall": 0.6242044008001455,
"eval_runtime": 1.774,
"eval_samples_per_second": 224.915,
"eval_steps_per_second": 28.185,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 3.8158535957336426,
"learning_rate": 4.5e-05,
"loss": 0.4779,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7418546365914787,
"eval_f1": 0.7122401394791937,
"eval_loss": 0.4949621260166168,
"eval_precision": 0.7053803339517626,
"eval_recall": 0.734860883797054,
"eval_runtime": 1.7746,
"eval_samples_per_second": 224.844,
"eval_steps_per_second": 28.176,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 3.156679630279541,
"learning_rate": 4.25e-05,
"loss": 0.4097,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.8245614035087719,
"eval_f1": 0.7664715719063545,
"eval_loss": 0.3772188425064087,
"eval_precision": 0.8092877840475827,
"eval_recall": 0.7458628841607565,
"eval_runtime": 1.8118,
"eval_samples_per_second": 220.228,
"eval_steps_per_second": 27.597,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 3.239713191986084,
"learning_rate": 4e-05,
"loss": 0.3451,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8446115288220551,
"eval_f1": 0.8169941409717701,
"eval_loss": 0.3511227071285248,
"eval_precision": 0.8104735988883742,
"eval_recall": 0.8250591016548463,
"eval_runtime": 1.8097,
"eval_samples_per_second": 220.479,
"eval_steps_per_second": 27.629,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 2.155226707458496,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.2959,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8255172205802521,
"eval_loss": 0.32013869285583496,
"eval_precision": 0.8239495798319327,
"eval_recall": 0.8271503909801782,
"eval_runtime": 1.81,
"eval_samples_per_second": 220.445,
"eval_steps_per_second": 27.625,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.97943115234375,
"learning_rate": 3.5e-05,
"loss": 0.2727,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8447157518450185,
"eval_loss": 0.3176342844963074,
"eval_precision": 0.8325401217487549,
"eval_recall": 0.864248045099109,
"eval_runtime": 1.8128,
"eval_samples_per_second": 220.097,
"eval_steps_per_second": 27.581,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 1.0954539775848389,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.2595,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.87468671679198,
"eval_f1": 0.8524146298159436,
"eval_loss": 0.2958522439002991,
"eval_precision": 0.8451250578971746,
"eval_recall": 0.8613384251682124,
"eval_runtime": 1.8171,
"eval_samples_per_second": 219.582,
"eval_steps_per_second": 27.516,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 6.516312122344971,
"learning_rate": 3e-05,
"loss": 0.2409,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8897243107769424,
"eval_f1": 0.8649122807017544,
"eval_loss": 0.28329744935035706,
"eval_precision": 0.8710116366366366,
"eval_recall": 0.8594744498999818,
"eval_runtime": 1.8171,
"eval_samples_per_second": 219.577,
"eval_steps_per_second": 27.516,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 6.709987163543701,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2298,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8771929824561403,
"eval_f1": 0.850729517396184,
"eval_loss": 0.2893889546394348,
"eval_precision": 0.8535087719298247,
"eval_recall": 0.8481087470449173,
"eval_runtime": 1.8128,
"eval_samples_per_second": 220.099,
"eval_steps_per_second": 27.581,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 4.345912933349609,
"learning_rate": 2.5e-05,
"loss": 0.2221,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8872180451127819,
"eval_f1": 0.8614765038536611,
"eval_loss": 0.2884393632411957,
"eval_precision": 0.8686536646744258,
"eval_recall": 0.8552009456264775,
"eval_runtime": 1.8154,
"eval_samples_per_second": 219.786,
"eval_steps_per_second": 27.542,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 2.3872387409210205,
"learning_rate": 2.25e-05,
"loss": 0.1986,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8847117794486216,
"eval_f1": 0.8587719298245614,
"eval_loss": 0.2855367362499237,
"eval_precision": 0.864771021021021,
"eval_recall": 0.8534278959810875,
"eval_runtime": 1.8165,
"eval_samples_per_second": 219.656,
"eval_steps_per_second": 27.526,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 5.007177829742432,
"learning_rate": 2e-05,
"loss": 0.1964,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8822055137844611,
"eval_f1": 0.8521068445832446,
"eval_loss": 0.29210031032562256,
"eval_precision": 0.8693800752624282,
"eval_recall": 0.8391525731951264,
"eval_runtime": 1.8152,
"eval_samples_per_second": 219.815,
"eval_steps_per_second": 27.546,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 1.4582099914550781,
"learning_rate": 1.75e-05,
"loss": 0.1783,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8897243107769424,
"eval_f1": 0.8649122807017544,
"eval_loss": 0.3103856146335602,
"eval_precision": 0.8710116366366366,
"eval_recall": 0.8594744498999818,
"eval_runtime": 1.8138,
"eval_samples_per_second": 219.982,
"eval_steps_per_second": 27.567,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 6.738508701324463,
"learning_rate": 1.5e-05,
"loss": 0.1788,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8897243107769424,
"eval_f1": 0.8689068100358424,
"eval_loss": 0.3015482425689697,
"eval_precision": 0.863953693884765,
"eval_recall": 0.8744771776686671,
"eval_runtime": 1.8152,
"eval_samples_per_second": 219.806,
"eval_steps_per_second": 27.545,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 2.6257522106170654,
"learning_rate": 1.25e-05,
"loss": 0.172,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8847117794486216,
"eval_f1": 0.8595070422535211,
"eval_loss": 0.3011764883995056,
"eval_precision": 0.8633733523114054,
"eval_recall": 0.8559283506092017,
"eval_runtime": 1.8131,
"eval_samples_per_second": 220.062,
"eval_steps_per_second": 27.577,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 4.563363552093506,
"learning_rate": 1e-05,
"loss": 0.1563,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8897243107769424,
"eval_f1": 0.8695225637671682,
"eval_loss": 0.3159307837486267,
"eval_precision": 0.8631532846715328,
"eval_recall": 0.8769776322967813,
"eval_runtime": 1.8172,
"eval_samples_per_second": 219.574,
"eval_steps_per_second": 27.516,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 1.206107258796692,
"learning_rate": 7.5e-06,
"loss": 0.1512,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8847117794486216,
"eval_f1": 0.8572517421602788,
"eval_loss": 0.32489535212516785,
"eval_precision": 0.8679426449878376,
"eval_recall": 0.8484269867248591,
"eval_runtime": 1.8131,
"eval_samples_per_second": 220.063,
"eval_steps_per_second": 27.577,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 4.8656744956970215,
"learning_rate": 5e-06,
"loss": 0.151,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8822055137844611,
"eval_f1": 0.855319904024935,
"eval_loss": 0.3245084285736084,
"eval_precision": 0.862378106322743,
"eval_recall": 0.8491543917075832,
"eval_runtime": 1.8159,
"eval_samples_per_second": 219.726,
"eval_steps_per_second": 27.535,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 3.600020170211792,
"learning_rate": 2.5e-06,
"loss": 0.1461,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8872180451127819,
"eval_f1": 0.8614765038536611,
"eval_loss": 0.328171044588089,
"eval_precision": 0.8686536646744258,
"eval_recall": 0.8552009456264775,
"eval_runtime": 1.8138,
"eval_samples_per_second": 219.976,
"eval_steps_per_second": 27.566,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 3.394913673400879,
"learning_rate": 0.0,
"loss": 0.1555,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8872180451127819,
"eval_f1": 0.8622036668943447,
"eval_loss": 0.32483023405075073,
"eval_precision": 0.8671602787456446,
"eval_recall": 0.8577014002545917,
"eval_runtime": 1.8125,
"eval_samples_per_second": 220.138,
"eval_steps_per_second": 27.586,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 8444128359504000.0,
"train_loss": 0.2495564101172275,
"train_runtime": 624.4298,
"train_samples_per_second": 116.522,
"train_steps_per_second": 3.908
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 8444128359504000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}