|
{ |
|
"best_metric": 0.8710034489631653, |
|
"best_model_checkpoint": "./checkpoint-400", |
|
"epoch": 12.5, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 3.3709, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 2.8912, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 2.2169, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 1.6923, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.407407407407408e-06, |
|
"loss": 1.3934, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.037037037037037e-06, |
|
"loss": 1.2336, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 1.1522, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 8.296296296296297e-06, |
|
"loss": 1.1339, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.925925925925926e-06, |
|
"loss": 0.9576, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.9153, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 1.0240015983581543, |
|
"eval_runtime": 182.7755, |
|
"eval_samples_per_second": 2.801, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 68.9863608183509, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 7.185185185185186e-06, |
|
"loss": 0.8884, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 6.814814814814815e-06, |
|
"loss": 0.8334, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 0.7989, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 6.0740740740740745e-06, |
|
"loss": 0.7718, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 5.7037037037037045e-06, |
|
"loss": 0.7671, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.8208, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 4.962962962962964e-06, |
|
"loss": 0.7219, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 4.592592592592593e-06, |
|
"loss": 0.7034, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.6966, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.851851851851852e-06, |
|
"loss": 0.6865, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.8967972993850708, |
|
"eval_runtime": 172.9658, |
|
"eval_samples_per_second": 2.96, |
|
"eval_steps_per_second": 0.093, |
|
"eval_wer": 61.7660411622276, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 6.255319148936171e-06, |
|
"loss": 0.6414, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 6.042553191489362e-06, |
|
"loss": 0.6552, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 5.829787234042553e-06, |
|
"loss": 0.6445, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 5.617021276595746e-06, |
|
"loss": 0.622, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 5.404255319148937e-06, |
|
"loss": 0.6004, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 5.191489361702128e-06, |
|
"loss": 0.5882, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 4.9787234042553195e-06, |
|
"loss": 0.6016, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.765957446808511e-06, |
|
"loss": 0.5682, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 4.553191489361702e-06, |
|
"loss": 0.5672, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 4.340425531914894e-06, |
|
"loss": 0.5474, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 0.8744030594825745, |
|
"eval_runtime": 190.8413, |
|
"eval_samples_per_second": 2.683, |
|
"eval_steps_per_second": 0.084, |
|
"eval_wer": 60.55538740920097, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 4.127659574468085e-06, |
|
"loss": 0.5432, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.914893617021277e-06, |
|
"loss": 0.534, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 3.702127659574468e-06, |
|
"loss": 0.5164, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 3.48936170212766e-06, |
|
"loss": 0.5223, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 3.276595744680851e-06, |
|
"loss": 0.5126, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 3.0638297872340428e-06, |
|
"loss": 0.5059, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 2.8510638297872346e-06, |
|
"loss": 0.4958, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 2.6382978723404256e-06, |
|
"loss": 0.4928, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 2.425531914893617e-06, |
|
"loss": 0.4856, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.2127659574468085e-06, |
|
"loss": 0.4646, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.8710034489631653, |
|
"eval_runtime": 173.1105, |
|
"eval_samples_per_second": 2.958, |
|
"eval_steps_per_second": 0.092, |
|
"eval_wer": 60.05599273607748, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.4711, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 1.7872340425531918e-06, |
|
"loss": 0.472, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 1.5744680851063832e-06, |
|
"loss": 0.4745, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 1.3617021276595746e-06, |
|
"loss": 0.4546, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 1.148936170212766e-06, |
|
"loss": 0.4702, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 9.361702127659575e-07, |
|
"loss": 0.448, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 7.234042553191489e-07, |
|
"loss": 0.4584, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 5.106382978723404e-07, |
|
"loss": 0.4719, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 2.9787234042553196e-07, |
|
"loss": 0.45, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 8.510638297872341e-08, |
|
"loss": 0.4557, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_loss": 0.873198390007019, |
|
"eval_runtime": 187.2044, |
|
"eval_samples_per_second": 2.735, |
|
"eval_steps_per_second": 0.085, |
|
"eval_wer": 59.465799031477, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"step": 500, |
|
"total_flos": 2.02648572002304e+18, |
|
"train_loss": 0.31471561336517334, |
|
"train_runtime": 1275.0957, |
|
"train_samples_per_second": 25.096, |
|
"train_steps_per_second": 0.392 |
|
} |
|
], |
|
"max_steps": 500, |
|
"num_train_epochs": 13, |
|
"total_flos": 2.02648572002304e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|