wav2vec2-xls-r-300m-bp1-es_eu / trainer_state.json
mpenagar's picture
Initial release
177a846
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.999854956849663,
"eval_steps": 4000,
"global_step": 124098,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19,
"learning_rate": 9.756372971128695e-05,
"loss": 0.7054,
"step": 4000
},
{
"epoch": 0.19,
"eval_cer": 0.02274243901983751,
"eval_loss": 0.10111288726329803,
"eval_runtime": 66.987,
"eval_samples_per_second": 61.131,
"eval_steps_per_second": 3.822,
"eval_wer": 0.08709714849302352,
"step": 4000
},
{
"epoch": 0.39,
"learning_rate": 9.431509853937514e-05,
"loss": 0.0856,
"step": 8000
},
{
"epoch": 0.39,
"eval_cer": 0.020731155298169313,
"eval_loss": 0.09945787489414215,
"eval_runtime": 67.6612,
"eval_samples_per_second": 60.522,
"eval_steps_per_second": 3.784,
"eval_wer": 0.07468163490267746,
"step": 8000
},
{
"epoch": 0.58,
"learning_rate": 9.106727972834653e-05,
"loss": 0.075,
"step": 12000
},
{
"epoch": 0.58,
"eval_cer": 0.018470211068126987,
"eval_loss": 0.08679623156785965,
"eval_runtime": 72.7557,
"eval_samples_per_second": 56.284,
"eval_steps_per_second": 3.519,
"eval_wer": 0.06474632320946828,
"step": 12000
},
{
"epoch": 0.77,
"learning_rate": 8.781864855643472e-05,
"loss": 0.0694,
"step": 16000
},
{
"epoch": 0.77,
"eval_cer": 0.01828354900579119,
"eval_loss": 0.08532032370567322,
"eval_runtime": 71.0749,
"eval_samples_per_second": 57.615,
"eval_steps_per_second": 3.602,
"eval_wer": 0.06190351869579091,
"step": 16000
},
{
"epoch": 0.97,
"learning_rate": 8.45708297454061e-05,
"loss": 0.0658,
"step": 20000
},
{
"epoch": 0.97,
"eval_cer": 0.017056245945933333,
"eval_loss": 0.0778348445892334,
"eval_runtime": 68.8966,
"eval_samples_per_second": 59.437,
"eval_steps_per_second": 3.716,
"eval_wer": 0.057262205204072755,
"step": 20000
},
{
"epoch": 1.16,
"learning_rate": 8.13230109343775e-05,
"loss": 0.0589,
"step": 24000
},
{
"epoch": 1.16,
"eval_cer": 0.016645589408794584,
"eval_loss": 0.08211962133646011,
"eval_runtime": 71.5048,
"eval_samples_per_second": 57.269,
"eval_steps_per_second": 3.58,
"eval_wer": 0.05463696226031967,
"step": 24000
},
{
"epoch": 1.35,
"learning_rate": 7.807437976246568e-05,
"loss": 0.0572,
"step": 28000
},
{
"epoch": 1.35,
"eval_cer": 0.0169955807756742,
"eval_loss": 0.08269500732421875,
"eval_runtime": 64.9267,
"eval_samples_per_second": 63.071,
"eval_steps_per_second": 3.943,
"eval_wer": 0.05579729063324921,
"step": 28000
},
{
"epoch": 1.55,
"learning_rate": 7.482656095143708e-05,
"loss": 0.0551,
"step": 32000
},
{
"epoch": 1.55,
"eval_cer": 0.016853250953143155,
"eval_loss": 0.08304612338542938,
"eval_runtime": 67.5544,
"eval_samples_per_second": 60.618,
"eval_steps_per_second": 3.79,
"eval_wer": 0.05333159284077394,
"step": 32000
},
{
"epoch": 1.74,
"learning_rate": 7.157874214040847e-05,
"loss": 0.054,
"step": 36000
},
{
"epoch": 1.74,
"eval_cer": 0.016157934770942316,
"eval_loss": 0.0788031816482544,
"eval_runtime": 68.7321,
"eval_samples_per_second": 59.579,
"eval_steps_per_second": 3.725,
"eval_wer": 0.05122849766483915,
"step": 36000
},
{
"epoch": 1.93,
"learning_rate": 6.832929860761345e-05,
"loss": 0.0524,
"step": 40000
},
{
"epoch": 1.93,
"eval_cer": 0.015562949447246968,
"eval_loss": 0.07834739238023758,
"eval_runtime": 66.4802,
"eval_samples_per_second": 61.597,
"eval_steps_per_second": 3.851,
"eval_wer": 0.04893684912830331,
"step": 40000
},
{
"epoch": 2.13,
"learning_rate": 6.508147979658484e-05,
"loss": 0.048,
"step": 44000
},
{
"epoch": 2.13,
"eval_cer": 0.016024938051528062,
"eval_loss": 0.08605939149856567,
"eval_runtime": 68.4968,
"eval_samples_per_second": 59.784,
"eval_steps_per_second": 3.737,
"eval_wer": 0.04918341890755084,
"step": 44000
},
{
"epoch": 2.32,
"learning_rate": 6.183284862467303e-05,
"loss": 0.046,
"step": 48000
},
{
"epoch": 2.32,
"eval_cer": 0.015420619624715923,
"eval_loss": 0.07625599950551987,
"eval_runtime": 73.2954,
"eval_samples_per_second": 55.87,
"eval_steps_per_second": 3.493,
"eval_wer": 0.049400980477475126,
"step": 48000
},
{
"epoch": 2.51,
"learning_rate": 5.8584217452761215e-05,
"loss": 0.0456,
"step": 52000
},
{
"epoch": 2.51,
"eval_cer": 0.015303955835756052,
"eval_loss": 0.08352649956941605,
"eval_runtime": 71.5001,
"eval_samples_per_second": 57.273,
"eval_steps_per_second": 3.58,
"eval_wer": 0.047094827836277664,
"step": 52000
},
{
"epoch": 2.71,
"learning_rate": 5.53363986417326e-05,
"loss": 0.0439,
"step": 56000
},
{
"epoch": 2.71,
"eval_cer": 0.015152292910108217,
"eval_loss": 0.07897598296403885,
"eval_runtime": 71.5932,
"eval_samples_per_second": 57.198,
"eval_steps_per_second": 3.576,
"eval_wer": 0.04686276216169176,
"step": 56000
},
{
"epoch": 2.9,
"learning_rate": 5.2088579830704e-05,
"loss": 0.0436,
"step": 60000
},
{
"epoch": 2.9,
"eval_cer": 0.0155349501378966,
"eval_loss": 0.08321597427129745,
"eval_runtime": 69.796,
"eval_samples_per_second": 58.671,
"eval_steps_per_second": 3.668,
"eval_wer": 0.047196356568909,
"step": 60000
},
{
"epoch": 3.09,
"learning_rate": 4.8840761019675384e-05,
"loss": 0.0406,
"step": 64000
},
{
"epoch": 3.09,
"eval_cer": 0.014837300679916562,
"eval_loss": 0.08103086799383163,
"eval_runtime": 69.1811,
"eval_samples_per_second": 59.192,
"eval_steps_per_second": 3.7,
"eval_wer": 0.04416499869463058,
"step": 64000
},
{
"epoch": 3.29,
"learning_rate": 4.559212984776357e-05,
"loss": 0.0386,
"step": 68000
},
{
"epoch": 3.29,
"eval_cer": 0.014624972584009594,
"eval_loss": 0.08100830018520355,
"eval_runtime": 72.8594,
"eval_samples_per_second": 56.204,
"eval_steps_per_second": 3.514,
"eval_wer": 0.043628346822150665,
"step": 68000
},
{
"epoch": 3.48,
"learning_rate": 4.234349867585176e-05,
"loss": 0.038,
"step": 72000
},
{
"epoch": 3.48,
"eval_cer": 0.014267981389792385,
"eval_loss": 0.07782719284296036,
"eval_runtime": 73.706,
"eval_samples_per_second": 55.559,
"eval_steps_per_second": 3.473,
"eval_wer": 0.04301917442636266,
"step": 72000
},
{
"epoch": 3.67,
"learning_rate": 3.909567986482315e-05,
"loss": 0.0373,
"step": 76000
},
{
"epoch": 3.67,
"eval_cer": 0.014431310694336206,
"eval_loss": 0.07849407941102982,
"eval_runtime": 74.4035,
"eval_samples_per_second": 55.038,
"eval_steps_per_second": 3.441,
"eval_wer": 0.042975662112377806,
"step": 76000
},
{
"epoch": 3.87,
"learning_rate": 3.584786105379454e-05,
"loss": 0.0363,
"step": 80000
},
{
"epoch": 3.87,
"eval_cer": 0.014375312075635468,
"eval_loss": 0.07884030044078827,
"eval_runtime": 68.8676,
"eval_samples_per_second": 59.462,
"eval_steps_per_second": 3.717,
"eval_wer": 0.042119919937342265,
"step": 80000
},
{
"epoch": 4.06,
"learning_rate": 3.259922988188273e-05,
"loss": 0.0348,
"step": 84000
},
{
"epoch": 4.06,
"eval_cer": 0.01435431259362269,
"eval_loss": 0.08232194930315018,
"eval_runtime": 67.1145,
"eval_samples_per_second": 61.015,
"eval_steps_per_second": 3.814,
"eval_wer": 0.04232297740260494,
"step": 84000
},
{
"epoch": 4.25,
"learning_rate": 2.935059870997092e-05,
"loss": 0.0323,
"step": 88000
},
{
"epoch": 4.25,
"eval_cer": 0.014316980181155532,
"eval_loss": 0.08194055408239365,
"eval_runtime": 76.1231,
"eval_samples_per_second": 53.794,
"eval_steps_per_second": 3.363,
"eval_wer": 0.04068401357584196,
"step": 88000
},
{
"epoch": 4.45,
"learning_rate": 2.610277989894231e-05,
"loss": 0.0319,
"step": 92000
},
{
"epoch": 4.45,
"eval_cer": 0.014167650531286895,
"eval_loss": 0.08085062354803085,
"eval_runtime": 74.2762,
"eval_samples_per_second": 55.132,
"eval_steps_per_second": 3.447,
"eval_wer": 0.041032112087720826,
"step": 92000
},
{
"epoch": 4.64,
"learning_rate": 2.2854148727030496e-05,
"loss": 0.0314,
"step": 96000
},
{
"epoch": 4.64,
"eval_cer": 0.01383399209486166,
"eval_loss": 0.08210451155900955,
"eval_runtime": 73.1195,
"eval_samples_per_second": 56.004,
"eval_steps_per_second": 3.501,
"eval_wer": 0.040045832970730715,
"step": 96000
},
{
"epoch": 4.83,
"learning_rate": 1.9606329916001885e-05,
"loss": 0.0306,
"step": 100000
},
{
"epoch": 4.83,
"eval_cer": 0.013700995375447406,
"eval_loss": 0.08130906522274017,
"eval_runtime": 70.4204,
"eval_samples_per_second": 58.151,
"eval_steps_per_second": 3.635,
"eval_wer": 0.03885649638847794,
"step": 100000
},
{
"epoch": 5.03,
"learning_rate": 1.6358511104973274e-05,
"loss": 0.0295,
"step": 104000
},
{
"epoch": 5.03,
"eval_cer": 0.013115343154868847,
"eval_loss": 0.0819702297449112,
"eval_runtime": 67.1904,
"eval_samples_per_second": 60.946,
"eval_steps_per_second": 3.81,
"eval_wer": 0.037710672120210016,
"step": 104000
},
{
"epoch": 5.22,
"learning_rate": 1.3109879933061465e-05,
"loss": 0.0275,
"step": 108000
},
{
"epoch": 5.22,
"eval_cer": 0.013700995375447406,
"eval_loss": 0.08659966289997101,
"eval_runtime": 75.5877,
"eval_samples_per_second": 54.176,
"eval_steps_per_second": 3.387,
"eval_wer": 0.037826704957502975,
"step": 108000
},
{
"epoch": 5.41,
"learning_rate": 9.862061122032852e-06,
"loss": 0.0267,
"step": 112000
},
{
"epoch": 5.41,
"eval_cer": 0.013351004008567788,
"eval_loss": 0.08306384831666946,
"eval_runtime": 71.1622,
"eval_samples_per_second": 57.545,
"eval_steps_per_second": 3.597,
"eval_wer": 0.03755112696893221,
"step": 112000
},
{
"epoch": 5.61,
"learning_rate": 6.6134299501210415e-06,
"loss": 0.0264,
"step": 116000
},
{
"epoch": 5.61,
"eval_cer": 0.013194674531361559,
"eval_loss": 0.08454854041337967,
"eval_runtime": 69.2173,
"eval_samples_per_second": 59.162,
"eval_steps_per_second": 3.698,
"eval_wer": 0.03691294636382096,
"step": 116000
},
{
"epoch": 5.8,
"learning_rate": 3.364798778209232e-06,
"loss": 0.0258,
"step": 120000
},
{
"epoch": 5.8,
"eval_cer": 0.013325337974996616,
"eval_loss": 0.08589179813861847,
"eval_runtime": 73.5677,
"eval_samples_per_second": 55.663,
"eval_steps_per_second": 3.48,
"eval_wer": 0.037028979201113914,
"step": 120000
},
{
"epoch": 6.0,
"learning_rate": 1.1616760629742156e-07,
"loss": 0.0254,
"step": 124000
},
{
"epoch": 6.0,
"eval_cer": 0.013243673322724706,
"eval_loss": 0.0846036821603775,
"eval_runtime": 67.6027,
"eval_samples_per_second": 60.575,
"eval_steps_per_second": 3.787,
"eval_wer": 0.03666637658457343,
"step": 124000
},
{
"epoch": 6.0,
"step": 124098,
"total_flos": 8.637354948645209e+20,
"train_loss": 0.0650978993577199,
"train_runtime": 137826.0599,
"train_samples_per_second": 28.813,
"train_steps_per_second": 0.9
}
],
"logging_steps": 4000,
"max_steps": 124098,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 500,
"total_flos": 8.637354948645209e+20,
"trial_name": null,
"trial_params": null
}