sentiment-pt-pl5-3 / trainer_state.json
apwic's picture
End of training
36757af verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 4.800751686096191,
"learning_rate": 4.75e-05,
"loss": 0.5455,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7543859649122807,
"eval_f1": 0.6639335808580858,
"eval_loss": 0.48765844106674194,
"eval_precision": 0.7053086419753087,
"eval_recall": 0.6512093107837789,
"eval_runtime": 1.7695,
"eval_samples_per_second": 225.487,
"eval_steps_per_second": 28.257,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 3.828364849090576,
"learning_rate": 4.5e-05,
"loss": 0.4356,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.8446115288220551,
"eval_f1": 0.8209674617878647,
"eval_loss": 0.35374370217323303,
"eval_precision": 0.810288627625787,
"eval_recall": 0.8375613747954174,
"eval_runtime": 1.7671,
"eval_samples_per_second": 225.791,
"eval_steps_per_second": 28.295,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 7.537293910980225,
"learning_rate": 4.25e-05,
"loss": 0.3468,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.849624060150376,
"eval_f1": 0.8072960267885891,
"eval_loss": 0.3415985107421875,
"eval_precision": 0.8325657894736842,
"eval_recall": 0.7910983815239134,
"eval_runtime": 1.7644,
"eval_samples_per_second": 226.134,
"eval_steps_per_second": 28.338,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 3.427431106567383,
"learning_rate": 4e-05,
"loss": 0.3049,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8180088078011953,
"eval_loss": 0.3125925660133362,
"eval_precision": 0.8323930726843348,
"eval_recall": 0.8071467539552646,
"eval_runtime": 1.7656,
"eval_samples_per_second": 225.985,
"eval_steps_per_second": 28.319,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 3.060295820236206,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.2673,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8315338681464504,
"eval_loss": 0.29185813665390015,
"eval_precision": 0.8299369747899159,
"eval_recall": 0.8331969448990726,
"eval_runtime": 1.7632,
"eval_samples_per_second": 226.299,
"eval_steps_per_second": 28.358,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.6104700565338135,
"learning_rate": 3.5e-05,
"loss": 0.2516,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8350734843845683,
"eval_loss": 0.2822675108909607,
"eval_precision": 0.8386812302741506,
"eval_recall": 0.8317421349336243,
"eval_runtime": 1.761,
"eval_samples_per_second": 226.577,
"eval_steps_per_second": 28.393,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 0.407837837934494,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.2243,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8822055137844611,
"eval_f1": 0.8622085718274466,
"eval_loss": 0.26883605122566223,
"eval_precision": 0.8530168716042322,
"eval_recall": 0.8741589379887251,
"eval_runtime": 1.7626,
"eval_samples_per_second": 226.37,
"eval_steps_per_second": 28.367,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 0.6773055791854858,
"learning_rate": 3e-05,
"loss": 0.2157,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8947368421052632,
"eval_f1": 0.8696646341463414,
"eval_loss": 0.2641172409057617,
"eval_precision": 0.8807130969146075,
"eval_recall": 0.8605200945626478,
"eval_runtime": 1.7614,
"eval_samples_per_second": 226.526,
"eval_steps_per_second": 28.387,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 3.9705936908721924,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2052,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8847117794486216,
"eval_f1": 0.8572517421602788,
"eval_loss": 0.26267343759536743,
"eval_precision": 0.8679426449878376,
"eval_recall": 0.8484269867248591,
"eval_runtime": 1.7598,
"eval_samples_per_second": 226.73,
"eval_steps_per_second": 28.412,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 13.646815299987793,
"learning_rate": 2.5e-05,
"loss": 0.1864,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8847117794486216,
"eval_f1": 0.8548402404302435,
"eval_loss": 0.288084477186203,
"eval_precision": 0.8737291514386338,
"eval_recall": 0.8409256228405164,
"eval_runtime": 1.7593,
"eval_samples_per_second": 226.79,
"eval_steps_per_second": 28.42,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 4.454102039337158,
"learning_rate": 2.25e-05,
"loss": 0.1928,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8872180451127819,
"eval_f1": 0.867476104365797,
"eval_loss": 0.2784619927406311,
"eval_precision": 0.8592596863468634,
"eval_recall": 0.8777050372795053,
"eval_runtime": 1.7608,
"eval_samples_per_second": 226.608,
"eval_steps_per_second": 28.397,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 4.11415958404541,
"learning_rate": 2e-05,
"loss": 0.1804,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.899749373433584,
"eval_f1": 0.8758710801393728,
"eval_loss": 0.2506408393383026,
"eval_precision": 0.8870983228779925,
"eval_recall": 0.8665666484815421,
"eval_runtime": 1.7598,
"eval_samples_per_second": 226.734,
"eval_steps_per_second": 28.413,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 1.9822263717651367,
"learning_rate": 1.75e-05,
"loss": 0.1654,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.899749373433584,
"eval_f1": 0.8790689216221131,
"eval_loss": 0.26640743017196655,
"eval_precision": 0.8790689216221131,
"eval_recall": 0.8790689216221131,
"eval_runtime": 1.7621,
"eval_samples_per_second": 226.429,
"eval_steps_per_second": 28.375,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 4.247542381286621,
"learning_rate": 1.5e-05,
"loss": 0.1567,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.9047619047619048,
"eval_f1": 0.8878351186601172,
"eval_loss": 0.2660583257675171,
"eval_precision": 0.879776516905975,
"eval_recall": 0.8976177486815784,
"eval_runtime": 1.7643,
"eval_samples_per_second": 226.146,
"eval_steps_per_second": 28.339,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 1.1535786390304565,
"learning_rate": 1.25e-05,
"loss": 0.1438,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.9097744360902256,
"eval_f1": 0.8917003438084323,
"eval_loss": 0.2614538073539734,
"eval_precision": 0.8898109243697478,
"eval_recall": 0.893662484088016,
"eval_runtime": 1.7625,
"eval_samples_per_second": 226.385,
"eval_steps_per_second": 28.369,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 0.5205868482589722,
"learning_rate": 1e-05,
"loss": 0.1472,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.9047619047619048,
"eval_f1": 0.8856836962422341,
"eval_loss": 0.25552284717559814,
"eval_precision": 0.8838235294117647,
"eval_recall": 0.8876159301691217,
"eval_runtime": 1.7611,
"eval_samples_per_second": 226.564,
"eval_steps_per_second": 28.392,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 6.205023288726807,
"learning_rate": 7.5e-06,
"loss": 0.1394,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.899749373433584,
"eval_f1": 0.8790689216221131,
"eval_loss": 0.2647987902164459,
"eval_precision": 0.8790689216221131,
"eval_recall": 0.8790689216221131,
"eval_runtime": 1.7605,
"eval_samples_per_second": 226.638,
"eval_steps_per_second": 28.401,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 1.5985056161880493,
"learning_rate": 5e-06,
"loss": 0.1387,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.9047619047619048,
"eval_f1": 0.8862394957983193,
"eval_loss": 0.26299694180488586,
"eval_precision": 0.8826476009275225,
"eval_recall": 0.8901163847972358,
"eval_runtime": 1.7607,
"eval_samples_per_second": 226.62,
"eval_steps_per_second": 28.399,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 1.7635153532028198,
"learning_rate": 2.5e-06,
"loss": 0.1378,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.9047619047619048,
"eval_f1": 0.8845345436822225,
"eval_loss": 0.26893848180770874,
"eval_precision": 0.8865278628291489,
"eval_recall": 0.8826150209128933,
"eval_runtime": 1.7631,
"eval_samples_per_second": 226.305,
"eval_steps_per_second": 28.359,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 3.3988428115844727,
"learning_rate": 0.0,
"loss": 0.1365,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.9022556390977443,
"eval_f1": 0.8817957385392532,
"eval_loss": 0.2682853937149048,
"eval_precision": 0.8827677592299257,
"eval_recall": 0.8808419712675032,
"eval_runtime": 1.7608,
"eval_samples_per_second": 226.596,
"eval_steps_per_second": 28.396,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 8444128359504000.0,
"train_loss": 0.22609603365913766,
"train_runtime": 618.465,
"train_samples_per_second": 117.646,
"train_steps_per_second": 3.945
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 8444128359504000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}