{ "best_metric": 47.85165083672546, "best_model_checkpoint": "./content/drive/MyDrive/kyrgyz_asr/checkpoint-2000", "epoch": 0.946969696969697, "eval_steps": 1000, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011837121212121212, "grad_norm": 38.770484924316406, "learning_rate": 4.4e-07, "loss": 3.1763, "step": 25 }, { "epoch": 0.023674242424242424, "grad_norm": 26.670623779296875, "learning_rate": 9.400000000000001e-07, "loss": 2.904, "step": 50 }, { "epoch": 0.03551136363636364, "grad_norm": 24.12047576904297, "learning_rate": 1.44e-06, "loss": 2.573, "step": 75 }, { "epoch": 0.04734848484848485, "grad_norm": 21.595748901367188, "learning_rate": 1.94e-06, "loss": 2.3106, "step": 100 }, { "epoch": 0.059185606060606064, "grad_norm": 18.133272171020508, "learning_rate": 2.4400000000000004e-06, "loss": 1.9787, "step": 125 }, { "epoch": 0.07102272727272728, "grad_norm": 17.44855499267578, "learning_rate": 2.9400000000000002e-06, "loss": 1.7705, "step": 150 }, { "epoch": 0.08285984848484848, "grad_norm": 16.95909309387207, "learning_rate": 3.44e-06, "loss": 1.5665, "step": 175 }, { "epoch": 0.0946969696969697, "grad_norm": 17.190784454345703, "learning_rate": 3.94e-06, "loss": 1.4962, "step": 200 }, { "epoch": 0.10653409090909091, "grad_norm": 16.988037109375, "learning_rate": 4.440000000000001e-06, "loss": 1.4281, "step": 225 }, { "epoch": 0.11837121212121213, "grad_norm": 14.77700424194336, "learning_rate": 4.94e-06, "loss": 1.3685, "step": 250 }, { "epoch": 0.13020833333333334, "grad_norm": 15.917333602905273, "learning_rate": 5.4400000000000004e-06, "loss": 1.2807, "step": 275 }, { "epoch": 0.14204545454545456, "grad_norm": 14.992497444152832, "learning_rate": 5.94e-06, "loss": 1.1803, "step": 300 }, { "epoch": 0.15388257575757575, "grad_norm": 15.551587104797363, "learning_rate": 6.440000000000001e-06, "loss": 1.148, "step": 325 }, { "epoch": 0.16571969696969696, "grad_norm": 14.418575286865234, "learning_rate": 6.9400000000000005e-06, "loss": 1.0953, "step": 350 }, { "epoch": 0.17755681818181818, "grad_norm": 15.400343894958496, "learning_rate": 7.440000000000001e-06, "loss": 1.0356, "step": 375 }, { "epoch": 0.1893939393939394, "grad_norm": 12.426461219787598, "learning_rate": 7.94e-06, "loss": 0.9864, "step": 400 }, { "epoch": 0.2012310606060606, "grad_norm": 13.831282615661621, "learning_rate": 8.44e-06, "loss": 0.9653, "step": 425 }, { "epoch": 0.21306818181818182, "grad_norm": 15.357656478881836, "learning_rate": 8.94e-06, "loss": 0.9935, "step": 450 }, { "epoch": 0.22490530303030304, "grad_norm": 13.355987548828125, "learning_rate": 9.440000000000001e-06, "loss": 0.9047, "step": 475 }, { "epoch": 0.23674242424242425, "grad_norm": 13.817172050476074, "learning_rate": 9.940000000000001e-06, "loss": 0.9242, "step": 500 }, { "epoch": 0.24857954545454544, "grad_norm": 15.737274169921875, "learning_rate": 9.937142857142858e-06, "loss": 0.8997, "step": 525 }, { "epoch": 0.2604166666666667, "grad_norm": 12.787036895751953, "learning_rate": 9.865714285714285e-06, "loss": 0.8344, "step": 550 }, { "epoch": 0.2722537878787879, "grad_norm": 13.924437522888184, "learning_rate": 9.794285714285714e-06, "loss": 0.8179, "step": 575 }, { "epoch": 0.2840909090909091, "grad_norm": 14.679474830627441, "learning_rate": 9.722857142857143e-06, "loss": 0.7755, "step": 600 }, { "epoch": 0.2959280303030303, "grad_norm": 13.893200874328613, "learning_rate": 9.651428571428572e-06, "loss": 0.7419, "step": 625 }, { "epoch": 0.3077651515151515, "grad_norm": 14.868252754211426, "learning_rate": 9.58e-06, "loss": 0.7708, "step": 650 }, { "epoch": 0.3196022727272727, "grad_norm": 14.574288368225098, "learning_rate": 9.508571428571429e-06, "loss": 0.7333, "step": 675 }, { "epoch": 0.3314393939393939, "grad_norm": 15.690669059753418, "learning_rate": 9.437142857142858e-06, "loss": 0.7006, "step": 700 }, { "epoch": 0.34327651515151514, "grad_norm": 12.136014938354492, "learning_rate": 9.365714285714287e-06, "loss": 0.705, "step": 725 }, { "epoch": 0.35511363636363635, "grad_norm": 13.789889335632324, "learning_rate": 9.294285714285714e-06, "loss": 0.7195, "step": 750 }, { "epoch": 0.36695075757575757, "grad_norm": 11.885604858398438, "learning_rate": 9.222857142857143e-06, "loss": 0.6753, "step": 775 }, { "epoch": 0.3787878787878788, "grad_norm": 12.935856819152832, "learning_rate": 9.151428571428572e-06, "loss": 0.6545, "step": 800 }, { "epoch": 0.390625, "grad_norm": 13.731322288513184, "learning_rate": 9.080000000000001e-06, "loss": 0.7291, "step": 825 }, { "epoch": 0.4024621212121212, "grad_norm": 13.80007553100586, "learning_rate": 9.00857142857143e-06, "loss": 0.6667, "step": 850 }, { "epoch": 0.41429924242424243, "grad_norm": 11.839824676513672, "learning_rate": 8.937142857142857e-06, "loss": 0.6221, "step": 875 }, { "epoch": 0.42613636363636365, "grad_norm": 12.550195693969727, "learning_rate": 8.865714285714287e-06, "loss": 0.6114, "step": 900 }, { "epoch": 0.43797348484848486, "grad_norm": 12.121932029724121, "learning_rate": 8.794285714285716e-06, "loss": 0.652, "step": 925 }, { "epoch": 0.4498106060606061, "grad_norm": 13.047248840332031, "learning_rate": 8.722857142857145e-06, "loss": 0.6201, "step": 950 }, { "epoch": 0.4616477272727273, "grad_norm": 12.148579597473145, "learning_rate": 8.651428571428572e-06, "loss": 0.622, "step": 975 }, { "epoch": 0.4734848484848485, "grad_norm": 14.126455307006836, "learning_rate": 8.580000000000001e-06, "loss": 0.59, "step": 1000 }, { "epoch": 0.4734848484848485, "eval_loss": 0.5916658043861389, "eval_runtime": 413.6011, "eval_samples_per_second": 3.902, "eval_steps_per_second": 0.488, "eval_wer": 60.805065581184984, "step": 1000 }, { "epoch": 0.4853219696969697, "grad_norm": 11.438024520874023, "learning_rate": 8.50857142857143e-06, "loss": 0.6024, "step": 1025 }, { "epoch": 0.4971590909090909, "grad_norm": 11.665973663330078, "learning_rate": 8.437142857142859e-06, "loss": 0.5949, "step": 1050 }, { "epoch": 0.5089962121212122, "grad_norm": 12.14466381072998, "learning_rate": 8.365714285714286e-06, "loss": 0.5825, "step": 1075 }, { "epoch": 0.5208333333333334, "grad_norm": 11.364850044250488, "learning_rate": 8.294285714285715e-06, "loss": 0.5962, "step": 1100 }, { "epoch": 0.5326704545454546, "grad_norm": 13.55361557006836, "learning_rate": 8.222857142857144e-06, "loss": 0.5897, "step": 1125 }, { "epoch": 0.5445075757575758, "grad_norm": 11.306770324707031, "learning_rate": 8.151428571428572e-06, "loss": 0.5654, "step": 1150 }, { "epoch": 0.556344696969697, "grad_norm": 11.402146339416504, "learning_rate": 8.08e-06, "loss": 0.5916, "step": 1175 }, { "epoch": 0.5681818181818182, "grad_norm": 10.837797164916992, "learning_rate": 8.00857142857143e-06, "loss": 0.5345, "step": 1200 }, { "epoch": 0.5800189393939394, "grad_norm": 12.855684280395508, "learning_rate": 7.937142857142857e-06, "loss": 0.5263, "step": 1225 }, { "epoch": 0.5918560606060606, "grad_norm": 11.619951248168945, "learning_rate": 7.865714285714286e-06, "loss": 0.5461, "step": 1250 }, { "epoch": 0.6036931818181818, "grad_norm": 11.1388578414917, "learning_rate": 7.794285714285715e-06, "loss": 0.5625, "step": 1275 }, { "epoch": 0.615530303030303, "grad_norm": 11.090073585510254, "learning_rate": 7.722857142857142e-06, "loss": 0.5178, "step": 1300 }, { "epoch": 0.6273674242424242, "grad_norm": 10.146711349487305, "learning_rate": 7.651428571428571e-06, "loss": 0.5201, "step": 1325 }, { "epoch": 0.6392045454545454, "grad_norm": 16.155235290527344, "learning_rate": 7.58e-06, "loss": 0.5247, "step": 1350 }, { "epoch": 0.6510416666666666, "grad_norm": 12.52512264251709, "learning_rate": 7.508571428571429e-06, "loss": 0.5237, "step": 1375 }, { "epoch": 0.6628787878787878, "grad_norm": 9.786041259765625, "learning_rate": 7.4371428571428575e-06, "loss": 0.483, "step": 1400 }, { "epoch": 0.6747159090909091, "grad_norm": 11.963520050048828, "learning_rate": 7.365714285714286e-06, "loss": 0.5099, "step": 1425 }, { "epoch": 0.6865530303030303, "grad_norm": 11.08193302154541, "learning_rate": 7.294285714285715e-06, "loss": 0.5417, "step": 1450 }, { "epoch": 0.6983901515151515, "grad_norm": 10.529409408569336, "learning_rate": 7.222857142857144e-06, "loss": 0.4837, "step": 1475 }, { "epoch": 0.7102272727272727, "grad_norm": 10.106945037841797, "learning_rate": 7.151428571428573e-06, "loss": 0.5158, "step": 1500 }, { "epoch": 0.7220643939393939, "grad_norm": 13.151567459106445, "learning_rate": 7.08e-06, "loss": 0.5181, "step": 1525 }, { "epoch": 0.7339015151515151, "grad_norm": 13.37302303314209, "learning_rate": 7.008571428571429e-06, "loss": 0.5059, "step": 1550 }, { "epoch": 0.7457386363636364, "grad_norm": 11.584297180175781, "learning_rate": 6.937142857142858e-06, "loss": 0.5098, "step": 1575 }, { "epoch": 0.7575757575757576, "grad_norm": 9.750558853149414, "learning_rate": 6.865714285714287e-06, "loss": 0.4852, "step": 1600 }, { "epoch": 0.7694128787878788, "grad_norm": 13.944685935974121, "learning_rate": 6.794285714285714e-06, "loss": 0.5334, "step": 1625 }, { "epoch": 0.78125, "grad_norm": 11.0306396484375, "learning_rate": 6.722857142857143e-06, "loss": 0.5418, "step": 1650 }, { "epoch": 0.7930871212121212, "grad_norm": 10.839930534362793, "learning_rate": 6.651428571428572e-06, "loss": 0.4968, "step": 1675 }, { "epoch": 0.8049242424242424, "grad_norm": 10.40273380279541, "learning_rate": 6.5800000000000005e-06, "loss": 0.4605, "step": 1700 }, { "epoch": 0.8167613636363636, "grad_norm": 12.725444793701172, "learning_rate": 6.5085714285714295e-06, "loss": 0.4701, "step": 1725 }, { "epoch": 0.8285984848484849, "grad_norm": 10.695274353027344, "learning_rate": 6.437142857142858e-06, "loss": 0.464, "step": 1750 }, { "epoch": 0.8404356060606061, "grad_norm": 11.814519882202148, "learning_rate": 6.365714285714286e-06, "loss": 0.4919, "step": 1775 }, { "epoch": 0.8522727272727273, "grad_norm": 8.929429054260254, "learning_rate": 6.294285714285715e-06, "loss": 0.4433, "step": 1800 }, { "epoch": 0.8641098484848485, "grad_norm": 12.6130952835083, "learning_rate": 6.222857142857144e-06, "loss": 0.49, "step": 1825 }, { "epoch": 0.8759469696969697, "grad_norm": 9.853967666625977, "learning_rate": 6.151428571428571e-06, "loss": 0.4424, "step": 1850 }, { "epoch": 0.8877840909090909, "grad_norm": 11.585264205932617, "learning_rate": 6.08e-06, "loss": 0.457, "step": 1875 }, { "epoch": 0.8996212121212122, "grad_norm": 10.458426475524902, "learning_rate": 6.008571428571429e-06, "loss": 0.4771, "step": 1900 }, { "epoch": 0.9114583333333334, "grad_norm": 9.979692459106445, "learning_rate": 5.937142857142858e-06, "loss": 0.4393, "step": 1925 }, { "epoch": 0.9232954545454546, "grad_norm": 10.795998573303223, "learning_rate": 5.865714285714286e-06, "loss": 0.4375, "step": 1950 }, { "epoch": 0.9351325757575758, "grad_norm": 11.389334678649902, "learning_rate": 5.794285714285715e-06, "loss": 0.4289, "step": 1975 }, { "epoch": 0.946969696969697, "grad_norm": 11.161763191223145, "learning_rate": 5.722857142857144e-06, "loss": 0.4987, "step": 2000 }, { "epoch": 0.946969696969697, "eval_loss": 0.41947832703590393, "eval_runtime": 407.8391, "eval_samples_per_second": 3.957, "eval_steps_per_second": 0.495, "eval_wer": 47.85165083672546, "step": 2000 } ], "logging_steps": 25, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.8780432384e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }