whisper-cy-small-augmented / trainer_state.json
Moreno La Quatra
End of training
a77881d
{
"best_metric": 23.061646012074995,
"best_model_checkpoint": "whisper-cy-small-augmented/checkpoint-5000",
"epoch": 30.48780487804878,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.3,
"learning_rate": 4.7e-06,
"loss": 3.0617,
"step": 50
},
{
"epoch": 0.61,
"learning_rate": 9.7e-06,
"loss": 1.6123,
"step": 100
},
{
"epoch": 0.91,
"learning_rate": 1.47e-05,
"loss": 1.1238,
"step": 150
},
{
"epoch": 1.22,
"learning_rate": 1.97e-05,
"loss": 0.7931,
"step": 200
},
{
"epoch": 1.52,
"learning_rate": 2.47e-05,
"loss": 0.6475,
"step": 250
},
{
"epoch": 1.83,
"learning_rate": 2.97e-05,
"loss": 0.5611,
"step": 300
},
{
"epoch": 2.13,
"learning_rate": 3.4699999999999996e-05,
"loss": 0.4511,
"step": 350
},
{
"epoch": 2.44,
"learning_rate": 3.97e-05,
"loss": 0.3488,
"step": 400
},
{
"epoch": 2.74,
"learning_rate": 4.47e-05,
"loss": 0.3475,
"step": 450
},
{
"epoch": 3.05,
"learning_rate": 4.97e-05,
"loss": 0.3222,
"step": 500
},
{
"epoch": 3.35,
"learning_rate": 4.947777777777778e-05,
"loss": 0.1932,
"step": 550
},
{
"epoch": 3.66,
"learning_rate": 4.892222222222222e-05,
"loss": 0.2064,
"step": 600
},
{
"epoch": 3.96,
"learning_rate": 4.836666666666667e-05,
"loss": 0.2041,
"step": 650
},
{
"epoch": 4.27,
"learning_rate": 4.781111111111111e-05,
"loss": 0.1152,
"step": 700
},
{
"epoch": 4.57,
"learning_rate": 4.725555555555556e-05,
"loss": 0.1119,
"step": 750
},
{
"epoch": 4.88,
"learning_rate": 4.6700000000000003e-05,
"loss": 0.1057,
"step": 800
},
{
"epoch": 5.18,
"learning_rate": 4.614444444444445e-05,
"loss": 0.0789,
"step": 850
},
{
"epoch": 5.49,
"learning_rate": 4.558888888888889e-05,
"loss": 0.0651,
"step": 900
},
{
"epoch": 5.79,
"learning_rate": 4.5033333333333335e-05,
"loss": 0.0721,
"step": 950
},
{
"epoch": 6.1,
"learning_rate": 4.447777777777778e-05,
"loss": 0.0597,
"step": 1000
},
{
"epoch": 6.1,
"eval_loss": 0.46903195977211,
"eval_runtime": 1165.3555,
"eval_samples_per_second": 4.519,
"eval_steps_per_second": 0.142,
"eval_wer": 29.466555449634573,
"step": 1000
},
{
"epoch": 6.4,
"learning_rate": 4.392222222222223e-05,
"loss": 0.0433,
"step": 1050
},
{
"epoch": 6.71,
"learning_rate": 4.3366666666666666e-05,
"loss": 0.0462,
"step": 1100
},
{
"epoch": 7.01,
"learning_rate": 4.281111111111111e-05,
"loss": 0.0449,
"step": 1150
},
{
"epoch": 7.32,
"learning_rate": 4.225555555555556e-05,
"loss": 0.0308,
"step": 1200
},
{
"epoch": 7.62,
"learning_rate": 4.17e-05,
"loss": 0.0308,
"step": 1250
},
{
"epoch": 7.93,
"learning_rate": 4.114444444444445e-05,
"loss": 0.0335,
"step": 1300
},
{
"epoch": 8.23,
"learning_rate": 4.058888888888889e-05,
"loss": 0.0267,
"step": 1350
},
{
"epoch": 8.54,
"learning_rate": 4.0033333333333335e-05,
"loss": 0.025,
"step": 1400
},
{
"epoch": 8.84,
"learning_rate": 3.947777777777778e-05,
"loss": 0.0241,
"step": 1450
},
{
"epoch": 9.15,
"learning_rate": 3.892222222222223e-05,
"loss": 0.0216,
"step": 1500
},
{
"epoch": 9.45,
"learning_rate": 3.8366666666666666e-05,
"loss": 0.0198,
"step": 1550
},
{
"epoch": 9.76,
"learning_rate": 3.781111111111112e-05,
"loss": 0.0193,
"step": 1600
},
{
"epoch": 10.06,
"learning_rate": 3.7255555555555554e-05,
"loss": 0.0171,
"step": 1650
},
{
"epoch": 10.37,
"learning_rate": 3.6700000000000004e-05,
"loss": 0.013,
"step": 1700
},
{
"epoch": 10.67,
"learning_rate": 3.614444444444445e-05,
"loss": 0.0164,
"step": 1750
},
{
"epoch": 10.98,
"learning_rate": 3.5588888888888885e-05,
"loss": 0.0172,
"step": 1800
},
{
"epoch": 11.28,
"learning_rate": 3.5033333333333336e-05,
"loss": 0.0114,
"step": 1850
},
{
"epoch": 11.59,
"learning_rate": 3.447777777777778e-05,
"loss": 0.0116,
"step": 1900
},
{
"epoch": 11.89,
"learning_rate": 3.392222222222222e-05,
"loss": 0.0126,
"step": 1950
},
{
"epoch": 12.2,
"learning_rate": 3.336666666666667e-05,
"loss": 0.0107,
"step": 2000
},
{
"epoch": 12.2,
"eval_loss": 0.47065049409866333,
"eval_runtime": 1115.4091,
"eval_samples_per_second": 4.721,
"eval_steps_per_second": 0.148,
"eval_wer": 26.267079758500163,
"step": 2000
},
{
"epoch": 12.5,
"learning_rate": 3.281111111111112e-05,
"loss": 0.0091,
"step": 2050
},
{
"epoch": 12.8,
"learning_rate": 3.2255555555555554e-05,
"loss": 0.0102,
"step": 2100
},
{
"epoch": 13.11,
"learning_rate": 3.1700000000000005e-05,
"loss": 0.0092,
"step": 2150
},
{
"epoch": 13.41,
"learning_rate": 3.114444444444445e-05,
"loss": 0.0067,
"step": 2200
},
{
"epoch": 13.72,
"learning_rate": 3.058888888888889e-05,
"loss": 0.0086,
"step": 2250
},
{
"epoch": 14.02,
"learning_rate": 3.0033333333333336e-05,
"loss": 0.0082,
"step": 2300
},
{
"epoch": 14.33,
"learning_rate": 2.9477777777777783e-05,
"loss": 0.0064,
"step": 2350
},
{
"epoch": 14.63,
"learning_rate": 2.8922222222222224e-05,
"loss": 0.0065,
"step": 2400
},
{
"epoch": 14.94,
"learning_rate": 2.836666666666667e-05,
"loss": 0.0051,
"step": 2450
},
{
"epoch": 15.24,
"learning_rate": 2.781111111111111e-05,
"loss": 0.0055,
"step": 2500
},
{
"epoch": 15.55,
"learning_rate": 2.7255555555555555e-05,
"loss": 0.0051,
"step": 2550
},
{
"epoch": 15.85,
"learning_rate": 2.6700000000000002e-05,
"loss": 0.0039,
"step": 2600
},
{
"epoch": 16.16,
"learning_rate": 2.6144444444444442e-05,
"loss": 0.0038,
"step": 2650
},
{
"epoch": 16.46,
"learning_rate": 2.558888888888889e-05,
"loss": 0.0028,
"step": 2700
},
{
"epoch": 16.77,
"learning_rate": 2.5033333333333336e-05,
"loss": 0.0025,
"step": 2750
},
{
"epoch": 17.07,
"learning_rate": 2.447777777777778e-05,
"loss": 0.0024,
"step": 2800
},
{
"epoch": 17.38,
"learning_rate": 2.3922222222222224e-05,
"loss": 0.0021,
"step": 2850
},
{
"epoch": 17.68,
"learning_rate": 2.3366666666666668e-05,
"loss": 0.0022,
"step": 2900
},
{
"epoch": 17.99,
"learning_rate": 2.281111111111111e-05,
"loss": 0.0025,
"step": 2950
},
{
"epoch": 18.29,
"learning_rate": 2.225555555555556e-05,
"loss": 0.0026,
"step": 3000
},
{
"epoch": 18.29,
"eval_loss": 0.46428239345550537,
"eval_runtime": 1090.3134,
"eval_samples_per_second": 4.83,
"eval_steps_per_second": 0.151,
"eval_wer": 24.67627899586908,
"step": 3000
},
{
"epoch": 18.6,
"learning_rate": 2.1700000000000002e-05,
"loss": 0.0018,
"step": 3050
},
{
"epoch": 18.9,
"learning_rate": 2.1144444444444446e-05,
"loss": 0.0015,
"step": 3100
},
{
"epoch": 19.21,
"learning_rate": 2.058888888888889e-05,
"loss": 0.0019,
"step": 3150
},
{
"epoch": 19.51,
"learning_rate": 2.0033333333333334e-05,
"loss": 0.0011,
"step": 3200
},
{
"epoch": 19.82,
"learning_rate": 1.9477777777777777e-05,
"loss": 0.0012,
"step": 3250
},
{
"epoch": 20.12,
"learning_rate": 1.8922222222222224e-05,
"loss": 0.001,
"step": 3300
},
{
"epoch": 20.43,
"learning_rate": 1.8366666666666668e-05,
"loss": 0.0014,
"step": 3350
},
{
"epoch": 20.73,
"learning_rate": 1.7811111111111112e-05,
"loss": 0.0015,
"step": 3400
},
{
"epoch": 21.04,
"learning_rate": 1.7255555555555556e-05,
"loss": 0.0015,
"step": 3450
},
{
"epoch": 21.34,
"learning_rate": 1.6700000000000003e-05,
"loss": 0.0017,
"step": 3500
},
{
"epoch": 21.65,
"learning_rate": 1.6144444444444446e-05,
"loss": 0.0016,
"step": 3550
},
{
"epoch": 21.95,
"learning_rate": 1.558888888888889e-05,
"loss": 0.0014,
"step": 3600
},
{
"epoch": 22.26,
"learning_rate": 1.5033333333333336e-05,
"loss": 0.001,
"step": 3650
},
{
"epoch": 22.56,
"learning_rate": 1.447777777777778e-05,
"loss": 0.0009,
"step": 3700
},
{
"epoch": 22.87,
"learning_rate": 1.3922222222222223e-05,
"loss": 0.001,
"step": 3750
},
{
"epoch": 23.17,
"learning_rate": 1.3366666666666667e-05,
"loss": 0.001,
"step": 3800
},
{
"epoch": 23.48,
"learning_rate": 1.2811111111111112e-05,
"loss": 0.0009,
"step": 3850
},
{
"epoch": 23.78,
"learning_rate": 1.2255555555555556e-05,
"loss": 0.0009,
"step": 3900
},
{
"epoch": 24.09,
"learning_rate": 1.1700000000000001e-05,
"loss": 0.001,
"step": 3950
},
{
"epoch": 24.39,
"learning_rate": 1.1144444444444445e-05,
"loss": 0.0007,
"step": 4000
},
{
"epoch": 24.39,
"eval_loss": 0.46286216378211975,
"eval_runtime": 1094.5709,
"eval_samples_per_second": 4.811,
"eval_steps_per_second": 0.151,
"eval_wer": 23.802430886558625,
"step": 4000
},
{
"epoch": 24.7,
"learning_rate": 1.058888888888889e-05,
"loss": 0.0009,
"step": 4050
},
{
"epoch": 25.0,
"learning_rate": 1.0033333333333333e-05,
"loss": 0.0007,
"step": 4100
},
{
"epoch": 25.3,
"learning_rate": 9.477777777777778e-06,
"loss": 0.0005,
"step": 4150
},
{
"epoch": 25.61,
"learning_rate": 8.922222222222222e-06,
"loss": 0.0006,
"step": 4200
},
{
"epoch": 25.91,
"learning_rate": 8.366666666666667e-06,
"loss": 0.0004,
"step": 4250
},
{
"epoch": 26.22,
"learning_rate": 7.811111111111113e-06,
"loss": 0.0004,
"step": 4300
},
{
"epoch": 26.52,
"learning_rate": 7.255555555555556e-06,
"loss": 0.0005,
"step": 4350
},
{
"epoch": 26.83,
"learning_rate": 6.700000000000001e-06,
"loss": 0.0004,
"step": 4400
},
{
"epoch": 27.13,
"learning_rate": 6.144444444444445e-06,
"loss": 0.0005,
"step": 4450
},
{
"epoch": 27.44,
"learning_rate": 5.588888888888889e-06,
"loss": 0.0005,
"step": 4500
},
{
"epoch": 27.74,
"learning_rate": 5.033333333333334e-06,
"loss": 0.0006,
"step": 4550
},
{
"epoch": 28.05,
"learning_rate": 4.477777777777778e-06,
"loss": 0.0004,
"step": 4600
},
{
"epoch": 28.35,
"learning_rate": 3.922222222222222e-06,
"loss": 0.0003,
"step": 4650
},
{
"epoch": 28.66,
"learning_rate": 3.3666666666666665e-06,
"loss": 0.0005,
"step": 4700
},
{
"epoch": 28.96,
"learning_rate": 2.811111111111111e-06,
"loss": 0.0003,
"step": 4750
},
{
"epoch": 29.27,
"learning_rate": 2.2555555555555557e-06,
"loss": 0.0003,
"step": 4800
},
{
"epoch": 29.57,
"learning_rate": 1.7000000000000002e-06,
"loss": 0.0003,
"step": 4850
},
{
"epoch": 29.88,
"learning_rate": 1.1444444444444446e-06,
"loss": 0.0004,
"step": 4900
},
{
"epoch": 30.18,
"learning_rate": 5.888888888888889e-07,
"loss": 0.0004,
"step": 4950
},
{
"epoch": 30.49,
"learning_rate": 3.3333333333333334e-08,
"loss": 0.0004,
"step": 5000
},
{
"epoch": 30.49,
"eval_loss": 0.46102383732795715,
"eval_runtime": 1096.0988,
"eval_samples_per_second": 4.804,
"eval_steps_per_second": 0.151,
"eval_wer": 23.061646012074995,
"step": 5000
},
{
"epoch": 30.49,
"step": 5000,
"total_flos": 9.23300129783808e+19,
"train_loss": 0.11097193325944245,
"train_runtime": 18201.3214,
"train_samples_per_second": 17.581,
"train_steps_per_second": 0.275
}
],
"max_steps": 5000,
"num_train_epochs": 31,
"total_flos": 9.23300129783808e+19,
"trial_name": null,
"trial_params": null
}