{ "best_metric": 0.8710034489631653, "best_model_checkpoint": "./checkpoint-400", "epoch": 12.5, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "learning_rate": 2.0000000000000003e-06, "loss": 3.3709, "step": 10 }, { "epoch": 0.5, "learning_rate": 5.333333333333334e-06, "loss": 2.8912, "step": 20 }, { "epoch": 0.75, "learning_rate": 8.666666666666668e-06, "loss": 2.2169, "step": 30 }, { "epoch": 1.0, "learning_rate": 9.777777777777779e-06, "loss": 1.6923, "step": 40 }, { "epoch": 1.25, "learning_rate": 9.407407407407408e-06, "loss": 1.3934, "step": 50 }, { "epoch": 1.5, "learning_rate": 9.037037037037037e-06, "loss": 1.2336, "step": 60 }, { "epoch": 1.75, "learning_rate": 8.666666666666668e-06, "loss": 1.1522, "step": 70 }, { "epoch": 2.0, "learning_rate": 8.296296296296297e-06, "loss": 1.1339, "step": 80 }, { "epoch": 2.25, "learning_rate": 7.925925925925926e-06, "loss": 0.9576, "step": 90 }, { "epoch": 2.5, "learning_rate": 7.555555555555556e-06, "loss": 0.9153, "step": 100 }, { "epoch": 2.5, "eval_loss": 1.0240015983581543, "eval_runtime": 182.7755, "eval_samples_per_second": 2.801, "eval_steps_per_second": 0.088, "eval_wer": 68.9863608183509, "step": 100 }, { "epoch": 2.75, "learning_rate": 7.185185185185186e-06, "loss": 0.8884, "step": 110 }, { "epoch": 3.0, "learning_rate": 6.814814814814815e-06, "loss": 0.8334, "step": 120 }, { "epoch": 3.25, "learning_rate": 6.444444444444445e-06, "loss": 0.7989, "step": 130 }, { "epoch": 3.5, "learning_rate": 6.0740740740740745e-06, "loss": 0.7718, "step": 140 }, { "epoch": 3.75, "learning_rate": 5.7037037037037045e-06, "loss": 0.7671, "step": 150 }, { "epoch": 4.0, "learning_rate": 5.333333333333334e-06, "loss": 0.8208, "step": 160 }, { "epoch": 4.25, "learning_rate": 4.962962962962964e-06, "loss": 0.7219, "step": 170 }, { "epoch": 4.5, "learning_rate": 4.592592592592593e-06, "loss": 0.7034, "step": 180 }, { "epoch": 4.75, "learning_rate": 4.222222222222223e-06, "loss": 0.6966, "step": 190 }, { "epoch": 5.0, "learning_rate": 3.851851851851852e-06, "loss": 0.6865, "step": 200 }, { "epoch": 5.0, "eval_loss": 0.8967972993850708, "eval_runtime": 172.9658, "eval_samples_per_second": 2.96, "eval_steps_per_second": 0.093, "eval_wer": 61.7660411622276, "step": 200 }, { "epoch": 5.25, "learning_rate": 6.255319148936171e-06, "loss": 0.6414, "step": 210 }, { "epoch": 5.5, "learning_rate": 6.042553191489362e-06, "loss": 0.6552, "step": 220 }, { "epoch": 5.75, "learning_rate": 5.829787234042553e-06, "loss": 0.6445, "step": 230 }, { "epoch": 6.0, "learning_rate": 5.617021276595746e-06, "loss": 0.622, "step": 240 }, { "epoch": 6.25, "learning_rate": 5.404255319148937e-06, "loss": 0.6004, "step": 250 }, { "epoch": 6.5, "learning_rate": 5.191489361702128e-06, "loss": 0.5882, "step": 260 }, { "epoch": 6.75, "learning_rate": 4.9787234042553195e-06, "loss": 0.6016, "step": 270 }, { "epoch": 7.0, "learning_rate": 4.765957446808511e-06, "loss": 0.5682, "step": 280 }, { "epoch": 7.25, "learning_rate": 4.553191489361702e-06, "loss": 0.5672, "step": 290 }, { "epoch": 7.5, "learning_rate": 4.340425531914894e-06, "loss": 0.5474, "step": 300 }, { "epoch": 7.5, "eval_loss": 0.8744030594825745, "eval_runtime": 190.8413, "eval_samples_per_second": 2.683, "eval_steps_per_second": 0.084, "eval_wer": 60.55538740920097, "step": 300 }, { "epoch": 7.75, "learning_rate": 4.127659574468085e-06, "loss": 0.5432, "step": 310 }, { "epoch": 8.0, "learning_rate": 3.914893617021277e-06, "loss": 0.534, "step": 320 }, { "epoch": 8.25, "learning_rate": 3.702127659574468e-06, "loss": 0.5164, "step": 330 }, { "epoch": 8.5, "learning_rate": 3.48936170212766e-06, "loss": 0.5223, "step": 340 }, { "epoch": 8.75, "learning_rate": 3.276595744680851e-06, "loss": 0.5126, "step": 350 }, { "epoch": 9.0, "learning_rate": 3.0638297872340428e-06, "loss": 0.5059, "step": 360 }, { "epoch": 9.25, "learning_rate": 2.8510638297872346e-06, "loss": 0.4958, "step": 370 }, { "epoch": 9.5, "learning_rate": 2.6382978723404256e-06, "loss": 0.4928, "step": 380 }, { "epoch": 9.75, "learning_rate": 2.425531914893617e-06, "loss": 0.4856, "step": 390 }, { "epoch": 10.0, "learning_rate": 2.2127659574468085e-06, "loss": 0.4646, "step": 400 }, { "epoch": 10.0, "eval_loss": 0.8710034489631653, "eval_runtime": 173.1105, "eval_samples_per_second": 2.958, "eval_steps_per_second": 0.092, "eval_wer": 60.05599273607748, "step": 400 }, { "epoch": 10.25, "learning_rate": 2.0000000000000003e-06, "loss": 0.4711, "step": 410 }, { "epoch": 10.5, "learning_rate": 1.7872340425531918e-06, "loss": 0.472, "step": 420 }, { "epoch": 10.75, "learning_rate": 1.5744680851063832e-06, "loss": 0.4745, "step": 430 }, { "epoch": 11.0, "learning_rate": 1.3617021276595746e-06, "loss": 0.4546, "step": 440 }, { "epoch": 11.25, "learning_rate": 1.148936170212766e-06, "loss": 0.4702, "step": 450 }, { "epoch": 11.5, "learning_rate": 9.361702127659575e-07, "loss": 0.448, "step": 460 }, { "epoch": 11.75, "learning_rate": 7.234042553191489e-07, "loss": 0.4584, "step": 470 }, { "epoch": 12.0, "learning_rate": 5.106382978723404e-07, "loss": 0.4719, "step": 480 }, { "epoch": 12.25, "learning_rate": 2.9787234042553196e-07, "loss": 0.45, "step": 490 }, { "epoch": 12.5, "learning_rate": 8.510638297872341e-08, "loss": 0.4557, "step": 500 }, { "epoch": 12.5, "eval_loss": 0.873198390007019, "eval_runtime": 187.2044, "eval_samples_per_second": 2.735, "eval_steps_per_second": 0.085, "eval_wer": 59.465799031477, "step": 500 }, { "epoch": 12.5, "step": 500, "total_flos": 2.02648572002304e+18, "train_loss": 0.31471561336517334, "train_runtime": 1275.0957, "train_samples_per_second": 25.096, "train_steps_per_second": 0.392 } ], "max_steps": 500, "num_train_epochs": 13, "total_flos": 2.02648572002304e+18, "trial_name": null, "trial_params": null }