{ "best_metric": 1.0327022375215147, "best_model_checkpoint": "./whisper-medium-attempt2-1000-orders-eleven-labs/checkpoint-900", "epoch": 5.645161290322581, "eval_steps": 100, "global_step": 1400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10080645161290322, "grad_norm": 40.95878982543945, "learning_rate": 4.2000000000000006e-07, "loss": 3.6067, "step": 25 }, { "epoch": 0.20161290322580644, "grad_norm": 28.143661499023438, "learning_rate": 9.200000000000001e-07, "loss": 2.7859, "step": 50 }, { "epoch": 0.3024193548387097, "grad_norm": 10.631092071533203, "learning_rate": 1.42e-06, "loss": 0.9512, "step": 75 }, { "epoch": 0.4032258064516129, "grad_norm": 1.839168906211853, "learning_rate": 1.9200000000000003e-06, "loss": 0.0668, "step": 100 }, { "epoch": 0.4032258064516129, "eval_loss": 0.03882083296775818, "eval_runtime": 54.3208, "eval_samples_per_second": 3.848, "eval_steps_per_second": 0.497, "eval_wer": 17.38382099827883, "step": 100 }, { "epoch": 0.5040322580645161, "grad_norm": 5.860921859741211, "learning_rate": 2.42e-06, "loss": 0.0347, "step": 125 }, { "epoch": 0.6048387096774194, "grad_norm": 4.118633270263672, "learning_rate": 2.92e-06, "loss": 0.0292, "step": 150 }, { "epoch": 0.7056451612903226, "grad_norm": 0.7270166873931885, "learning_rate": 3.4200000000000007e-06, "loss": 0.0204, "step": 175 }, { "epoch": 0.8064516129032258, "grad_norm": 0.20527507364749908, "learning_rate": 3.920000000000001e-06, "loss": 0.0142, "step": 200 }, { "epoch": 0.8064516129032258, "eval_loss": 0.00608229311183095, "eval_runtime": 54.4284, "eval_samples_per_second": 3.84, "eval_steps_per_second": 0.496, "eval_wer": 11.359724612736661, "step": 200 }, { "epoch": 0.907258064516129, "grad_norm": 3.9435720443725586, "learning_rate": 4.42e-06, "loss": 0.0091, "step": 225 }, { "epoch": 1.0080645161290323, "grad_norm": 0.173879012465477, "learning_rate": 4.92e-06, "loss": 0.0112, "step": 250 }, { "epoch": 1.1088709677419355, "grad_norm": 0.018560703843832016, "learning_rate": 5.420000000000001e-06, "loss": 0.0063, "step": 275 }, { "epoch": 1.2096774193548387, "grad_norm": 4.80478048324585, "learning_rate": 5.92e-06, "loss": 0.0075, "step": 300 }, { "epoch": 1.2096774193548387, "eval_loss": 0.007460152264684439, "eval_runtime": 54.3247, "eval_samples_per_second": 3.847, "eval_steps_per_second": 0.497, "eval_wer": 9.63855421686747, "step": 300 }, { "epoch": 1.310483870967742, "grad_norm": 0.022897183895111084, "learning_rate": 6.42e-06, "loss": 0.0163, "step": 325 }, { "epoch": 1.4112903225806452, "grad_norm": 0.06287501752376556, "learning_rate": 6.92e-06, "loss": 0.0078, "step": 350 }, { "epoch": 1.5120967741935485, "grad_norm": 0.019055448472499847, "learning_rate": 7.420000000000001e-06, "loss": 0.0057, "step": 375 }, { "epoch": 1.6129032258064515, "grad_norm": 0.04413852468132973, "learning_rate": 7.92e-06, "loss": 0.0073, "step": 400 }, { "epoch": 1.6129032258064515, "eval_loss": 0.010435141623020172, "eval_runtime": 54.1901, "eval_samples_per_second": 3.857, "eval_steps_per_second": 0.498, "eval_wer": 7.74526678141136, "step": 400 }, { "epoch": 1.713709677419355, "grad_norm": 0.6023885607719421, "learning_rate": 8.42e-06, "loss": 0.0173, "step": 425 }, { "epoch": 1.814516129032258, "grad_norm": 0.062626414000988, "learning_rate": 8.920000000000001e-06, "loss": 0.0114, "step": 450 }, { "epoch": 1.9153225806451613, "grad_norm": 4.65421724319458, "learning_rate": 9.42e-06, "loss": 0.0046, "step": 475 }, { "epoch": 2.0161290322580645, "grad_norm": 0.11005760729312897, "learning_rate": 9.920000000000002e-06, "loss": 0.0087, "step": 500 }, { "epoch": 2.0161290322580645, "eval_loss": 0.012454940006136894, "eval_runtime": 54.5944, "eval_samples_per_second": 3.828, "eval_steps_per_second": 0.495, "eval_wer": 2.9259896729776247, "step": 500 }, { "epoch": 2.1169354838709675, "grad_norm": 0.016795210540294647, "learning_rate": 9.86e-06, "loss": 0.0076, "step": 525 }, { "epoch": 2.217741935483871, "grad_norm": 0.1811501681804657, "learning_rate": 9.693333333333334e-06, "loss": 0.007, "step": 550 }, { "epoch": 2.318548387096774, "grad_norm": 0.0245444867759943, "learning_rate": 9.526666666666668e-06, "loss": 0.0099, "step": 575 }, { "epoch": 2.4193548387096775, "grad_norm": 0.06346794962882996, "learning_rate": 9.360000000000002e-06, "loss": 0.0046, "step": 600 }, { "epoch": 2.4193548387096775, "eval_loss": 0.00803467072546482, "eval_runtime": 54.4661, "eval_samples_per_second": 3.837, "eval_steps_per_second": 0.496, "eval_wer": 1.549053356282272, "step": 600 }, { "epoch": 2.5201612903225805, "grad_norm": 0.037542328238487244, "learning_rate": 9.193333333333334e-06, "loss": 0.0065, "step": 625 }, { "epoch": 2.620967741935484, "grad_norm": 2.8664350509643555, "learning_rate": 9.026666666666666e-06, "loss": 0.0092, "step": 650 }, { "epoch": 2.721774193548387, "grad_norm": 4.69785737991333, "learning_rate": 8.860000000000002e-06, "loss": 0.0052, "step": 675 }, { "epoch": 2.8225806451612905, "grad_norm": 0.005957436747848988, "learning_rate": 8.693333333333334e-06, "loss": 0.0087, "step": 700 }, { "epoch": 2.8225806451612905, "eval_loss": 0.003936439286917448, "eval_runtime": 54.4976, "eval_samples_per_second": 3.835, "eval_steps_per_second": 0.495, "eval_wer": 1.7211703958691909, "step": 700 }, { "epoch": 2.9233870967741935, "grad_norm": 0.008426151238381863, "learning_rate": 8.526666666666667e-06, "loss": 0.0059, "step": 725 }, { "epoch": 3.024193548387097, "grad_norm": 0.004362072329968214, "learning_rate": 8.36e-06, "loss": 0.0054, "step": 750 }, { "epoch": 3.125, "grad_norm": 1.2939238548278809, "learning_rate": 8.193333333333335e-06, "loss": 0.005, "step": 775 }, { "epoch": 3.225806451612903, "grad_norm": 0.0022686896845698357, "learning_rate": 8.026666666666667e-06, "loss": 0.0066, "step": 800 }, { "epoch": 3.225806451612903, "eval_loss": 0.00420153234153986, "eval_runtime": 54.3822, "eval_samples_per_second": 3.843, "eval_steps_per_second": 0.496, "eval_wer": 1.376936316695353, "step": 800 }, { "epoch": 3.3266129032258065, "grad_norm": 0.0034014617558568716, "learning_rate": 7.860000000000001e-06, "loss": 0.0063, "step": 825 }, { "epoch": 3.4274193548387095, "grad_norm": 7.198966026306152, "learning_rate": 7.693333333333333e-06, "loss": 0.0052, "step": 850 }, { "epoch": 3.528225806451613, "grad_norm": 0.0018629450350999832, "learning_rate": 7.526666666666668e-06, "loss": 0.003, "step": 875 }, { "epoch": 3.629032258064516, "grad_norm": 3.725576877593994, "learning_rate": 7.360000000000001e-06, "loss": 0.0032, "step": 900 }, { "epoch": 3.629032258064516, "eval_loss": 0.009456031955778599, "eval_runtime": 54.5839, "eval_samples_per_second": 3.829, "eval_steps_per_second": 0.495, "eval_wer": 1.0327022375215147, "step": 900 }, { "epoch": 3.7298387096774195, "grad_norm": 0.3921663761138916, "learning_rate": 7.1933333333333345e-06, "loss": 0.0041, "step": 925 }, { "epoch": 3.8306451612903225, "grad_norm": 2.3925461769104004, "learning_rate": 7.0266666666666674e-06, "loss": 0.0017, "step": 950 }, { "epoch": 3.931451612903226, "grad_norm": 0.002618137514218688, "learning_rate": 6.860000000000001e-06, "loss": 0.0047, "step": 975 }, { "epoch": 4.032258064516129, "grad_norm": 3.606403112411499, "learning_rate": 6.693333333333334e-06, "loss": 0.0027, "step": 1000 }, { "epoch": 4.032258064516129, "eval_loss": 0.01142825372517109, "eval_runtime": 54.5807, "eval_samples_per_second": 3.829, "eval_steps_per_second": 0.495, "eval_wer": 1.549053356282272, "step": 1000 }, { "epoch": 4.133064516129032, "grad_norm": 2.7271430492401123, "learning_rate": 6.526666666666666e-06, "loss": 0.0078, "step": 1025 }, { "epoch": 4.233870967741935, "grad_norm": 3.484804153442383, "learning_rate": 6.360000000000001e-06, "loss": 0.0026, "step": 1050 }, { "epoch": 4.334677419354839, "grad_norm": 0.0015495093539357185, "learning_rate": 6.193333333333333e-06, "loss": 0.0017, "step": 1075 }, { "epoch": 4.435483870967742, "grad_norm": 0.0017610510112717748, "learning_rate": 6.026666666666668e-06, "loss": 0.0021, "step": 1100 }, { "epoch": 4.435483870967742, "eval_loss": 0.009943087585270405, "eval_runtime": 54.465, "eval_samples_per_second": 3.837, "eval_steps_per_second": 0.496, "eval_wer": 1.7211703958691909, "step": 1100 }, { "epoch": 4.536290322580645, "grad_norm": 0.0015177098102867603, "learning_rate": 5.86e-06, "loss": 0.0022, "step": 1125 }, { "epoch": 4.637096774193548, "grad_norm": 0.06703988462686539, "learning_rate": 5.6933333333333344e-06, "loss": 0.0099, "step": 1150 }, { "epoch": 4.737903225806452, "grad_norm": 0.0013143372489139438, "learning_rate": 5.5266666666666666e-06, "loss": 0.0028, "step": 1175 }, { "epoch": 4.838709677419355, "grad_norm": 3.9103496074676514, "learning_rate": 5.36e-06, "loss": 0.0039, "step": 1200 }, { "epoch": 4.838709677419355, "eval_loss": 0.0121237151324749, "eval_runtime": 54.615, "eval_samples_per_second": 3.827, "eval_steps_per_second": 0.494, "eval_wer": 1.8932874354561102, "step": 1200 }, { "epoch": 4.939516129032258, "grad_norm": 0.015102783218026161, "learning_rate": 5.193333333333333e-06, "loss": 0.0042, "step": 1225 }, { "epoch": 5.040322580645161, "grad_norm": 0.0012233111774548888, "learning_rate": 5.026666666666667e-06, "loss": 0.0013, "step": 1250 }, { "epoch": 5.141129032258065, "grad_norm": 0.004319190047681332, "learning_rate": 4.86e-06, "loss": 0.002, "step": 1275 }, { "epoch": 5.241935483870968, "grad_norm": 0.008857857435941696, "learning_rate": 4.693333333333334e-06, "loss": 0.0017, "step": 1300 }, { "epoch": 5.241935483870968, "eval_loss": 0.012567833997309208, "eval_runtime": 54.7115, "eval_samples_per_second": 3.82, "eval_steps_per_second": 0.493, "eval_wer": 1.376936316695353, "step": 1300 }, { "epoch": 5.342741935483871, "grad_norm": 0.00218038447201252, "learning_rate": 4.526666666666667e-06, "loss": 0.0017, "step": 1325 }, { "epoch": 5.443548387096774, "grad_norm": 0.3447363078594208, "learning_rate": 4.360000000000001e-06, "loss": 0.0035, "step": 1350 }, { "epoch": 5.544354838709677, "grad_norm": 0.6128267049789429, "learning_rate": 4.1933333333333336e-06, "loss": 0.003, "step": 1375 }, { "epoch": 5.645161290322581, "grad_norm": 0.019433248788118362, "learning_rate": 4.026666666666667e-06, "loss": 0.0033, "step": 1400 }, { "epoch": 5.645161290322581, "eval_loss": 0.00934593565762043, "eval_runtime": 54.3938, "eval_samples_per_second": 3.842, "eval_steps_per_second": 0.496, "eval_wer": 1.8932874354561102, "step": 1400 } ], "logging_steps": 25, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.2785004560384e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }