wav2vec2-base-960h-EMOPIA-10sec / trainer_state.json
kurosekurose's picture
End of training
a747fb9 verified
raw
history blame contribute delete
No virus
13.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.0,
"eval_steps": 500,
"global_step": 24210,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 6.501698017120361,
"learning_rate": 9.66914498141264e-06,
"loss": 1.2014,
"step": 807
},
{
"epoch": 1.0,
"eval_accuracy": 0.3661971688270569,
"eval_loss": 1.183009147644043,
"eval_runtime": 5.7015,
"eval_samples_per_second": 12.453,
"eval_steps_per_second": 12.453,
"step": 807
},
{
"epoch": 2.0,
"grad_norm": 11.675583839416504,
"learning_rate": 9.335811648079307e-06,
"loss": 1.0915,
"step": 1614
},
{
"epoch": 2.0,
"eval_accuracy": 0.3239436745643616,
"eval_loss": 1.5119783878326416,
"eval_runtime": 5.7636,
"eval_samples_per_second": 12.319,
"eval_steps_per_second": 12.319,
"step": 1614
},
{
"epoch": 3.0,
"grad_norm": 16.97098731994629,
"learning_rate": 9.002478314745973e-06,
"loss": 1.1433,
"step": 2421
},
{
"epoch": 3.0,
"eval_accuracy": 0.4084506928920746,
"eval_loss": 1.5698989629745483,
"eval_runtime": 5.8045,
"eval_samples_per_second": 12.232,
"eval_steps_per_second": 12.232,
"step": 2421
},
{
"epoch": 4.0,
"grad_norm": 127.35404205322266,
"learning_rate": 8.669558033870302e-06,
"loss": 1.2819,
"step": 3228
},
{
"epoch": 4.0,
"eval_accuracy": 0.47887325286865234,
"eval_loss": 1.737151861190796,
"eval_runtime": 5.8167,
"eval_samples_per_second": 12.206,
"eval_steps_per_second": 12.206,
"step": 3228
},
{
"epoch": 5.0,
"grad_norm": 0.40140867233276367,
"learning_rate": 8.336224700536968e-06,
"loss": 1.2718,
"step": 4035
},
{
"epoch": 5.0,
"eval_accuracy": 0.4647887349128723,
"eval_loss": 2.216925859451294,
"eval_runtime": 5.8112,
"eval_samples_per_second": 12.218,
"eval_steps_per_second": 12.218,
"step": 4035
},
{
"epoch": 6.0,
"grad_norm": 30.236343383789062,
"learning_rate": 8.003304419661297e-06,
"loss": 1.4535,
"step": 4842
},
{
"epoch": 6.0,
"eval_accuracy": 0.577464759349823,
"eval_loss": 1.7295811176300049,
"eval_runtime": 5.8523,
"eval_samples_per_second": 12.132,
"eval_steps_per_second": 12.132,
"step": 4842
},
{
"epoch": 7.0,
"grad_norm": 0.2374914586544037,
"learning_rate": 7.670384138785627e-06,
"loss": 1.3433,
"step": 5649
},
{
"epoch": 7.0,
"eval_accuracy": 0.5492957830429077,
"eval_loss": 2.268446683883667,
"eval_runtime": 5.8095,
"eval_samples_per_second": 12.221,
"eval_steps_per_second": 12.221,
"step": 5649
},
{
"epoch": 8.0,
"grad_norm": 32.27313232421875,
"learning_rate": 7.3370508054522925e-06,
"loss": 1.4086,
"step": 6456
},
{
"epoch": 8.0,
"eval_accuracy": 0.6478873491287231,
"eval_loss": 1.8598684072494507,
"eval_runtime": 5.7903,
"eval_samples_per_second": 12.262,
"eval_steps_per_second": 12.262,
"step": 6456
},
{
"epoch": 9.0,
"grad_norm": 42.521121978759766,
"learning_rate": 7.003717472118959e-06,
"loss": 1.3923,
"step": 7263
},
{
"epoch": 9.0,
"eval_accuracy": 0.6197183132171631,
"eval_loss": 1.9419935941696167,
"eval_runtime": 5.8464,
"eval_samples_per_second": 12.144,
"eval_steps_per_second": 12.144,
"step": 7263
},
{
"epoch": 10.0,
"grad_norm": 0.37923476099967957,
"learning_rate": 6.670384138785626e-06,
"loss": 1.3353,
"step": 8070
},
{
"epoch": 10.0,
"eval_accuracy": 0.577464759349823,
"eval_loss": 2.2150189876556396,
"eval_runtime": 5.8548,
"eval_samples_per_second": 12.127,
"eval_steps_per_second": 12.127,
"step": 8070
},
{
"epoch": 11.0,
"grad_norm": 19.937904357910156,
"learning_rate": 6.337463857909954e-06,
"loss": 1.367,
"step": 8877
},
{
"epoch": 11.0,
"eval_accuracy": 0.6338028311729431,
"eval_loss": 1.9825525283813477,
"eval_runtime": 5.9069,
"eval_samples_per_second": 12.02,
"eval_steps_per_second": 12.02,
"step": 8877
},
{
"epoch": 12.0,
"grad_norm": 454.2120666503906,
"learning_rate": 6.004543577034284e-06,
"loss": 1.1848,
"step": 9684
},
{
"epoch": 12.0,
"eval_accuracy": 0.6478873491287231,
"eval_loss": 1.9545217752456665,
"eval_runtime": 5.8618,
"eval_samples_per_second": 12.112,
"eval_steps_per_second": 12.112,
"step": 9684
},
{
"epoch": 13.0,
"grad_norm": 0.09417306631803513,
"learning_rate": 5.671210243700951e-06,
"loss": 1.1355,
"step": 10491
},
{
"epoch": 13.0,
"eval_accuracy": 0.6619718074798584,
"eval_loss": 1.9863765239715576,
"eval_runtime": 5.8578,
"eval_samples_per_second": 12.12,
"eval_steps_per_second": 12.12,
"step": 10491
},
{
"epoch": 14.0,
"grad_norm": 33.132957458496094,
"learning_rate": 5.337876910367618e-06,
"loss": 1.1549,
"step": 11298
},
{
"epoch": 14.0,
"eval_accuracy": 0.6338028311729431,
"eval_loss": 1.9428231716156006,
"eval_runtime": 5.8665,
"eval_samples_per_second": 12.103,
"eval_steps_per_second": 12.103,
"step": 11298
},
{
"epoch": 15.0,
"grad_norm": 0.12686021625995636,
"learning_rate": 5.004543577034285e-06,
"loss": 1.0505,
"step": 12105
},
{
"epoch": 15.0,
"eval_accuracy": 0.6901408433914185,
"eval_loss": 1.9100552797317505,
"eval_runtime": 5.8853,
"eval_samples_per_second": 12.064,
"eval_steps_per_second": 12.064,
"step": 12105
},
{
"epoch": 16.0,
"grad_norm": 1043.1195068359375,
"learning_rate": 4.6712102437009505e-06,
"loss": 1.0442,
"step": 12912
},
{
"epoch": 16.0,
"eval_accuracy": 0.6478873491287231,
"eval_loss": 2.1706087589263916,
"eval_runtime": 5.8878,
"eval_samples_per_second": 12.059,
"eval_steps_per_second": 12.059,
"step": 12912
},
{
"epoch": 17.0,
"grad_norm": 26.500118255615234,
"learning_rate": 4.338289962825279e-06,
"loss": 0.9922,
"step": 13719
},
{
"epoch": 17.0,
"eval_accuracy": 0.6197183132171631,
"eval_loss": 2.462040901184082,
"eval_runtime": 5.8428,
"eval_samples_per_second": 12.152,
"eval_steps_per_second": 12.152,
"step": 13719
},
{
"epoch": 18.0,
"grad_norm": 162.029052734375,
"learning_rate": 4.004956629491946e-06,
"loss": 0.8698,
"step": 14526
},
{
"epoch": 18.0,
"eval_accuracy": 0.6619718074798584,
"eval_loss": 2.142850637435913,
"eval_runtime": 5.9312,
"eval_samples_per_second": 11.971,
"eval_steps_per_second": 11.971,
"step": 14526
},
{
"epoch": 19.0,
"grad_norm": 0.0350579209625721,
"learning_rate": 3.6716232961586124e-06,
"loss": 0.8202,
"step": 15333
},
{
"epoch": 19.0,
"eval_accuracy": 0.6197183132171631,
"eval_loss": 2.372546911239624,
"eval_runtime": 5.8453,
"eval_samples_per_second": 12.147,
"eval_steps_per_second": 12.147,
"step": 15333
},
{
"epoch": 20.0,
"grad_norm": 0.042548421770334244,
"learning_rate": 3.3387030152829415e-06,
"loss": 0.8612,
"step": 16140
},
{
"epoch": 20.0,
"eval_accuracy": 0.6619718074798584,
"eval_loss": 2.1631431579589844,
"eval_runtime": 5.8804,
"eval_samples_per_second": 12.074,
"eval_steps_per_second": 12.074,
"step": 16140
},
{
"epoch": 21.0,
"grad_norm": 0.09584546834230423,
"learning_rate": 3.0053696819496083e-06,
"loss": 0.8197,
"step": 16947
},
{
"epoch": 21.0,
"eval_accuracy": 0.6338028311729431,
"eval_loss": 2.393216848373413,
"eval_runtime": 5.8299,
"eval_samples_per_second": 12.179,
"eval_steps_per_second": 12.179,
"step": 16947
},
{
"epoch": 22.0,
"grad_norm": 0.0448361337184906,
"learning_rate": 2.672036348616274e-06,
"loss": 0.7858,
"step": 17754
},
{
"epoch": 22.0,
"eval_accuracy": 0.6478873491287231,
"eval_loss": 2.25321364402771,
"eval_runtime": 5.8111,
"eval_samples_per_second": 12.218,
"eval_steps_per_second": 12.218,
"step": 17754
},
{
"epoch": 23.0,
"grad_norm": 0.057099100202322006,
"learning_rate": 2.3391160677406034e-06,
"loss": 0.7717,
"step": 18561
},
{
"epoch": 23.0,
"eval_accuracy": 0.5633803009986877,
"eval_loss": 2.813150644302368,
"eval_runtime": 5.8476,
"eval_samples_per_second": 12.142,
"eval_steps_per_second": 12.142,
"step": 18561
},
{
"epoch": 24.0,
"grad_norm": 202.8040008544922,
"learning_rate": 2.00578273440727e-06,
"loss": 0.6282,
"step": 19368
},
{
"epoch": 24.0,
"eval_accuracy": 0.6197183132171631,
"eval_loss": 2.549309492111206,
"eval_runtime": 5.839,
"eval_samples_per_second": 12.16,
"eval_steps_per_second": 12.16,
"step": 19368
},
{
"epoch": 25.0,
"grad_norm": 0.06747495383024216,
"learning_rate": 1.6728624535315987e-06,
"loss": 0.7394,
"step": 20175
},
{
"epoch": 25.0,
"eval_accuracy": 0.6619718074798584,
"eval_loss": 2.3195266723632812,
"eval_runtime": 5.8894,
"eval_samples_per_second": 12.056,
"eval_steps_per_second": 12.056,
"step": 20175
},
{
"epoch": 26.0,
"grad_norm": 4.30875301361084,
"learning_rate": 1.3395291201982652e-06,
"loss": 0.5895,
"step": 20982
},
{
"epoch": 26.0,
"eval_accuracy": 0.6619718074798584,
"eval_loss": 2.43306565284729,
"eval_runtime": 5.8283,
"eval_samples_per_second": 12.182,
"eval_steps_per_second": 12.182,
"step": 20982
},
{
"epoch": 27.0,
"grad_norm": 0.05214543268084526,
"learning_rate": 1.006195786864932e-06,
"loss": 0.5854,
"step": 21789
},
{
"epoch": 27.0,
"eval_accuracy": 0.6760563254356384,
"eval_loss": 2.428130626678467,
"eval_runtime": 5.8154,
"eval_samples_per_second": 12.209,
"eval_steps_per_second": 12.209,
"step": 21789
},
{
"epoch": 28.0,
"grad_norm": 0.1047598198056221,
"learning_rate": 6.732755059892606e-07,
"loss": 0.6911,
"step": 22596
},
{
"epoch": 28.0,
"eval_accuracy": 0.6619718074798584,
"eval_loss": 2.499279737472534,
"eval_runtime": 5.8567,
"eval_samples_per_second": 12.123,
"eval_steps_per_second": 12.123,
"step": 22596
},
{
"epoch": 29.0,
"grad_norm": 92.8244857788086,
"learning_rate": 3.3994217265592734e-07,
"loss": 0.5502,
"step": 23403
},
{
"epoch": 29.0,
"eval_accuracy": 0.6338028311729431,
"eval_loss": 2.64582896232605,
"eval_runtime": 5.8325,
"eval_samples_per_second": 12.173,
"eval_steps_per_second": 12.173,
"step": 23403
},
{
"epoch": 30.0,
"grad_norm": 0.047546908259391785,
"learning_rate": 6.60883932259397e-09,
"loss": 0.584,
"step": 24210
},
{
"epoch": 30.0,
"eval_accuracy": 0.6338028311729431,
"eval_loss": 2.586568593978882,
"eval_runtime": 5.8563,
"eval_samples_per_second": 12.124,
"eval_steps_per_second": 12.124,
"step": 24210
},
{
"epoch": 30.0,
"step": 24210,
"total_flos": 2.2475408052256202e+18,
"train_loss": 1.0049422518175921,
"train_runtime": 3971.9322,
"train_samples_per_second": 6.095,
"train_steps_per_second": 6.095
}
],
"logging_steps": 12,
"max_steps": 24210,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.2475408052256202e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}