wav2vec2-xls-r-1b-hebrew / trainer_state.json
imvladikon's picture
End of training
e8f3ff1
raw
history blame
15.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.99970457902511,
"global_step": 16920,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.47,
"learning_rate": 0.00029775,
"loss": 3.3587,
"step": 400
},
{
"epoch": 0.47,
"eval_loss": 1.1883399486541748,
"eval_runtime": 298.8414,
"eval_samples_per_second": 16.986,
"eval_steps_per_second": 2.831,
"eval_wer": 0.8392195865162833,
"step": 400
},
{
"epoch": 0.95,
"learning_rate": 0.00029279055690072635,
"loss": 1.8377,
"step": 800
},
{
"epoch": 0.95,
"eval_loss": 0.8830727338790894,
"eval_runtime": 295.1554,
"eval_samples_per_second": 17.198,
"eval_steps_per_second": 2.866,
"eval_wer": 0.6852318175085172,
"step": 800
},
{
"epoch": 1.42,
"learning_rate": 0.00028552663438256653,
"loss": 1.7118,
"step": 1200
},
{
"epoch": 1.42,
"eval_loss": 0.8031275868415833,
"eval_runtime": 296.1538,
"eval_samples_per_second": 17.14,
"eval_steps_per_second": 2.857,
"eval_wer": 0.6565588166832427,
"step": 1200
},
{
"epoch": 1.89,
"learning_rate": 0.00027826271186440676,
"loss": 1.6741,
"step": 1600
},
{
"epoch": 1.89,
"eval_loss": 0.7517648339271545,
"eval_runtime": 296.1537,
"eval_samples_per_second": 17.14,
"eval_steps_per_second": 2.857,
"eval_wer": 0.6104492456144063,
"step": 1600
},
{
"epoch": 2.36,
"learning_rate": 0.00027099878934624694,
"loss": 1.6163,
"step": 2000
},
{
"epoch": 2.36,
"eval_loss": 0.6887586712837219,
"eval_runtime": 298.4301,
"eval_samples_per_second": 17.009,
"eval_steps_per_second": 2.835,
"eval_wer": 0.5590706138773092,
"step": 2000
},
{
"epoch": 2.84,
"learning_rate": 0.0002637348668280871,
"loss": 1.5782,
"step": 2400
},
{
"epoch": 2.84,
"eval_loss": 0.6580386161804199,
"eval_runtime": 296.628,
"eval_samples_per_second": 17.112,
"eval_steps_per_second": 2.852,
"eval_wer": 0.5164525890344288,
"step": 2400
},
{
"epoch": 3.31,
"learning_rate": 0.0002564709443099273,
"loss": 1.5548,
"step": 2800
},
{
"epoch": 3.31,
"eval_loss": 0.6505530476570129,
"eval_runtime": 300.5251,
"eval_samples_per_second": 16.89,
"eval_steps_per_second": 2.815,
"eval_wer": 0.5184417123389128,
"step": 2800
},
{
"epoch": 3.78,
"learning_rate": 0.00024920702179176754,
"loss": 1.5249,
"step": 3200
},
{
"epoch": 3.78,
"eval_loss": 0.6197642683982849,
"eval_runtime": 298.2224,
"eval_samples_per_second": 17.021,
"eval_steps_per_second": 2.837,
"eval_wer": 0.5028461391963095,
"step": 3200
},
{
"epoch": 4.26,
"learning_rate": 0.00024194309927360772,
"loss": 1.5078,
"step": 3600
},
{
"epoch": 4.26,
"eval_loss": 0.5992246866226196,
"eval_runtime": 298.0744,
"eval_samples_per_second": 17.029,
"eval_steps_per_second": 2.838,
"eval_wer": 0.4932179359671583,
"step": 3600
},
{
"epoch": 4.73,
"learning_rate": 0.0002346791767554479,
"loss": 1.4836,
"step": 4000
},
{
"epoch": 4.73,
"eval_loss": 0.5705241560935974,
"eval_runtime": 292.5306,
"eval_samples_per_second": 17.352,
"eval_steps_per_second": 2.892,
"eval_wer": 0.46513743995598533,
"step": 4000
},
{
"epoch": 5.2,
"learning_rate": 0.0002274152542372881,
"loss": 1.4505,
"step": 4400
},
{
"epoch": 5.2,
"eval_loss": 0.5488837957382202,
"eval_runtime": 300.7764,
"eval_samples_per_second": 16.876,
"eval_steps_per_second": 2.813,
"eval_wer": 0.4507691982140212,
"step": 4400
},
{
"epoch": 5.67,
"learning_rate": 0.00022015133171912832,
"loss": 1.4481,
"step": 4800
},
{
"epoch": 5.67,
"eval_loss": 0.5577110052108765,
"eval_runtime": 299.1357,
"eval_samples_per_second": 16.969,
"eval_steps_per_second": 2.828,
"eval_wer": 0.45624986774446114,
"step": 4800
},
{
"epoch": 6.15,
"learning_rate": 0.0002128874092009685,
"loss": 1.4136,
"step": 5200
},
{
"epoch": 6.15,
"eval_loss": 0.5452219247817993,
"eval_runtime": 298.6905,
"eval_samples_per_second": 16.994,
"eval_steps_per_second": 2.832,
"eval_wer": 0.4370992657172482,
"step": 5200
},
{
"epoch": 6.62,
"learning_rate": 0.00020564164648910412,
"loss": 1.3861,
"step": 5600
},
{
"epoch": 6.62,
"eval_loss": 0.510087788105011,
"eval_runtime": 298.7447,
"eval_samples_per_second": 16.991,
"eval_steps_per_second": 2.832,
"eval_wer": 0.40865903464037073,
"step": 5600
},
{
"epoch": 7.09,
"learning_rate": 0.0001983777239709443,
"loss": 1.3772,
"step": 6000
},
{
"epoch": 7.09,
"eval_loss": 0.49333110451698303,
"eval_runtime": 298.2699,
"eval_samples_per_second": 17.018,
"eval_steps_per_second": 2.836,
"eval_wer": 0.39513722834712317,
"step": 6000
},
{
"epoch": 7.56,
"learning_rate": 0.00019111380145278448,
"loss": 1.3478,
"step": 6400
},
{
"epoch": 7.56,
"eval_loss": 0.4849308133125305,
"eval_runtime": 299.279,
"eval_samples_per_second": 16.961,
"eval_steps_per_second": 2.827,
"eval_wer": 0.3921747042766151,
"step": 6400
},
{
"epoch": 8.04,
"learning_rate": 0.0001838498789346247,
"loss": 1.3394,
"step": 6800
},
{
"epoch": 8.04,
"eval_loss": 0.4805210828781128,
"eval_runtime": 297.1556,
"eval_samples_per_second": 17.082,
"eval_steps_per_second": 2.847,
"eval_wer": 0.3891698584336712,
"step": 6800
},
{
"epoch": 8.51,
"learning_rate": 0.00017658595641646487,
"loss": 1.3095,
"step": 7200
},
{
"epoch": 8.51,
"eval_loss": 0.48388615250587463,
"eval_runtime": 298.6068,
"eval_samples_per_second": 16.999,
"eval_steps_per_second": 2.833,
"eval_wer": 0.38335061472374465,
"step": 7200
},
{
"epoch": 8.98,
"learning_rate": 0.00016932203389830508,
"loss": 1.306,
"step": 7600
},
{
"epoch": 8.98,
"eval_loss": 0.461063027381897,
"eval_runtime": 296.7161,
"eval_samples_per_second": 17.107,
"eval_steps_per_second": 2.851,
"eval_wer": 0.35865586050743803,
"step": 7600
},
{
"epoch": 9.46,
"learning_rate": 0.00016205811138014526,
"loss": 1.2707,
"step": 8000
},
{
"epoch": 9.46,
"eval_loss": 0.4544948935508728,
"eval_runtime": 298.2639,
"eval_samples_per_second": 17.018,
"eval_steps_per_second": 2.836,
"eval_wer": 0.3730241022494022,
"step": 8000
},
{
"epoch": 9.93,
"learning_rate": 0.00015479418886198547,
"loss": 1.2626,
"step": 8400
},
{
"epoch": 9.93,
"eval_loss": 0.4515869617462158,
"eval_runtime": 299.4234,
"eval_samples_per_second": 16.953,
"eval_steps_per_second": 2.825,
"eval_wer": 0.35241339907315317,
"step": 8400
},
{
"epoch": 10.4,
"learning_rate": 0.00014753026634382565,
"loss": 1.2412,
"step": 8800
},
{
"epoch": 10.4,
"eval_loss": 0.4314204454421997,
"eval_runtime": 294.8808,
"eval_samples_per_second": 17.214,
"eval_steps_per_second": 2.869,
"eval_wer": 0.3310197431068413,
"step": 8800
},
{
"epoch": 10.87,
"learning_rate": 0.00014026634382566583,
"loss": 1.2456,
"step": 9200
},
{
"epoch": 10.87,
"eval_loss": 0.4400792419910431,
"eval_runtime": 296.6883,
"eval_samples_per_second": 17.109,
"eval_steps_per_second": 2.851,
"eval_wer": 0.3458958461180354,
"step": 9200
},
{
"epoch": 11.35,
"learning_rate": 0.00013302058111380144,
"loss": 1.2081,
"step": 9600
},
{
"epoch": 11.35,
"eval_loss": 0.4399240016937256,
"eval_runtime": 295.1566,
"eval_samples_per_second": 17.198,
"eval_steps_per_second": 2.866,
"eval_wer": 0.33563281630234676,
"step": 9600
},
{
"epoch": 11.82,
"learning_rate": 0.00012575665859564165,
"loss": 1.1998,
"step": 10000
},
{
"epoch": 11.82,
"eval_loss": 0.41947221755981445,
"eval_runtime": 296.8576,
"eval_samples_per_second": 17.099,
"eval_steps_per_second": 2.85,
"eval_wer": 0.3215396660812155,
"step": 10000
},
{
"epoch": 12.29,
"learning_rate": 0.00011851089588377723,
"loss": 1.1826,
"step": 10400
},
{
"epoch": 12.29,
"eval_loss": 0.4220864474773407,
"eval_runtime": 296.3734,
"eval_samples_per_second": 17.127,
"eval_steps_per_second": 2.855,
"eval_wer": 0.3177518674482087,
"step": 10400
},
{
"epoch": 12.77,
"learning_rate": 0.00011124697336561742,
"loss": 1.1573,
"step": 10800
},
{
"epoch": 12.77,
"eval_loss": 0.4097737967967987,
"eval_runtime": 297.6541,
"eval_samples_per_second": 17.053,
"eval_steps_per_second": 2.842,
"eval_wer": 0.3084199166261083,
"step": 10800
},
{
"epoch": 13.24,
"learning_rate": 0.00010398305084745762,
"loss": 1.1416,
"step": 11200
},
{
"epoch": 13.24,
"eval_loss": 0.4085627496242523,
"eval_runtime": 296.7019,
"eval_samples_per_second": 17.108,
"eval_steps_per_second": 2.851,
"eval_wer": 0.3119114628520642,
"step": 11200
},
{
"epoch": 13.71,
"learning_rate": 9.671912832929781e-05,
"loss": 1.1174,
"step": 11600
},
{
"epoch": 13.71,
"eval_loss": 0.3854358196258545,
"eval_runtime": 297.2728,
"eval_samples_per_second": 17.075,
"eval_steps_per_second": 2.846,
"eval_wer": 0.2910468290412003,
"step": 11600
},
{
"epoch": 14.18,
"learning_rate": 8.947336561743341e-05,
"loss": 1.1048,
"step": 12000
},
{
"epoch": 14.18,
"eval_loss": 0.38590070605278015,
"eval_runtime": 297.414,
"eval_samples_per_second": 17.067,
"eval_steps_per_second": 2.845,
"eval_wer": 0.282413187464291,
"step": 12000
},
{
"epoch": 14.66,
"learning_rate": 8.22094430992736e-05,
"loss": 1.0748,
"step": 12400
},
{
"epoch": 14.66,
"eval_loss": 0.3854171633720398,
"eval_runtime": 295.6597,
"eval_samples_per_second": 17.168,
"eval_steps_per_second": 2.861,
"eval_wer": 0.275705186533212,
"step": 12400
},
{
"epoch": 15.13,
"learning_rate": 7.494552058111379e-05,
"loss": 1.0697,
"step": 12800
},
{
"epoch": 15.13,
"eval_loss": 0.37401217222213745,
"eval_runtime": 297.4379,
"eval_samples_per_second": 17.066,
"eval_steps_per_second": 2.844,
"eval_wer": 0.2723829273969994,
"step": 12800
},
{
"epoch": 15.6,
"learning_rate": 6.7681598062954e-05,
"loss": 1.0477,
"step": 13200
},
{
"epoch": 15.6,
"eval_loss": 0.3693487048149109,
"eval_runtime": 298.4761,
"eval_samples_per_second": 17.006,
"eval_steps_per_second": 2.834,
"eval_wer": 0.2642994688617559,
"step": 13200
},
{
"epoch": 16.08,
"learning_rate": 6.0417675544794184e-05,
"loss": 1.0356,
"step": 13600
},
{
"epoch": 16.08,
"eval_loss": 0.3726678788661957,
"eval_runtime": 298.583,
"eval_samples_per_second": 17.0,
"eval_steps_per_second": 2.833,
"eval_wer": 0.2561102058954229,
"step": 13600
},
{
"epoch": 16.55,
"learning_rate": 5.315375302663438e-05,
"loss": 1.0083,
"step": 14000
},
{
"epoch": 16.55,
"eval_loss": 0.36522331833839417,
"eval_runtime": 301.7885,
"eval_samples_per_second": 16.82,
"eval_steps_per_second": 2.803,
"eval_wer": 0.25007935332331716,
"step": 14000
},
{
"epoch": 17.02,
"learning_rate": 4.5889830508474574e-05,
"loss": 1.0,
"step": 14400
},
{
"epoch": 17.02,
"eval_loss": 0.36411818861961365,
"eval_runtime": 297.5091,
"eval_samples_per_second": 17.062,
"eval_steps_per_second": 2.844,
"eval_wer": 0.24572021076242673,
"step": 14400
},
{
"epoch": 17.49,
"learning_rate": 3.862590799031477e-05,
"loss": 0.9779,
"step": 14800
},
{
"epoch": 17.49,
"eval_loss": 0.35678642988204956,
"eval_runtime": 295.5669,
"eval_samples_per_second": 17.174,
"eval_steps_per_second": 2.862,
"eval_wer": 0.24089552870474215,
"step": 14800
},
{
"epoch": 17.97,
"learning_rate": 3.1361985472154963e-05,
"loss": 0.9596,
"step": 15200
},
{
"epoch": 17.97,
"eval_loss": 0.3557918071746826,
"eval_runtime": 296.6965,
"eval_samples_per_second": 17.108,
"eval_steps_per_second": 2.851,
"eval_wer": 0.23759443045474746,
"step": 15200
},
{
"epoch": 18.44,
"learning_rate": 2.4098062953995155e-05,
"loss": 0.946,
"step": 15600
},
{
"epoch": 18.44,
"eval_loss": 0.35907429456710815,
"eval_runtime": 295.7242,
"eval_samples_per_second": 17.165,
"eval_steps_per_second": 2.861,
"eval_wer": 0.23114036015828343,
"step": 15600
},
{
"epoch": 18.91,
"learning_rate": 1.685230024213075e-05,
"loss": 0.9389,
"step": 16000
},
{
"epoch": 18.91,
"eval_loss": 0.3540380001068115,
"eval_runtime": 296.3149,
"eval_samples_per_second": 17.13,
"eval_steps_per_second": 2.855,
"eval_wer": 0.22828364051886493,
"step": 16000
},
{
"epoch": 19.39,
"learning_rate": 9.588377723970942e-06,
"loss": 0.9173,
"step": 16400
},
{
"epoch": 19.39,
"eval_loss": 0.35515815019607544,
"eval_runtime": 296.2267,
"eval_samples_per_second": 17.136,
"eval_steps_per_second": 2.856,
"eval_wer": 0.2265061260765601,
"step": 16400
},
{
"epoch": 19.86,
"learning_rate": 2.3244552058111378e-06,
"loss": 0.9122,
"step": 16800
},
{
"epoch": 19.86,
"eval_loss": 0.353456050157547,
"eval_runtime": 298.3571,
"eval_samples_per_second": 17.013,
"eval_steps_per_second": 2.836,
"eval_wer": 0.22502486404130606,
"step": 16800
},
{
"epoch": 20.0,
"step": 16920,
"total_flos": 1.9284410553255697e+20,
"train_loss": 1.3125287554224614,
"train_runtime": 67690.7896,
"train_samples_per_second": 6.0,
"train_steps_per_second": 0.25
}
],
"max_steps": 16920,
"num_train_epochs": 20,
"total_flos": 1.9284410553255697e+20,
"trial_name": null,
"trial_params": null
}