{ "best_metric": 38.50746268656716, "best_model_checkpoint": "./content/drive/MyDrive/kyrgyz_asr/checkpoint-4000", "epoch": 1.893939393939394, "eval_steps": 1000, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011837121212121212, "grad_norm": 38.770484924316406, "learning_rate": 4.4e-07, "loss": 3.1763, "step": 25 }, { "epoch": 0.023674242424242424, "grad_norm": 26.670623779296875, "learning_rate": 9.400000000000001e-07, "loss": 2.904, "step": 50 }, { "epoch": 0.03551136363636364, "grad_norm": 24.12047576904297, "learning_rate": 1.44e-06, "loss": 2.573, "step": 75 }, { "epoch": 0.04734848484848485, "grad_norm": 21.595748901367188, "learning_rate": 1.94e-06, "loss": 2.3106, "step": 100 }, { "epoch": 0.059185606060606064, "grad_norm": 18.133272171020508, "learning_rate": 2.4400000000000004e-06, "loss": 1.9787, "step": 125 }, { "epoch": 0.07102272727272728, "grad_norm": 17.44855499267578, "learning_rate": 2.9400000000000002e-06, "loss": 1.7705, "step": 150 }, { "epoch": 0.08285984848484848, "grad_norm": 16.95909309387207, "learning_rate": 3.44e-06, "loss": 1.5665, "step": 175 }, { "epoch": 0.0946969696969697, "grad_norm": 17.190784454345703, "learning_rate": 3.94e-06, "loss": 1.4962, "step": 200 }, { "epoch": 0.10653409090909091, "grad_norm": 16.988037109375, "learning_rate": 4.440000000000001e-06, "loss": 1.4281, "step": 225 }, { "epoch": 0.11837121212121213, "grad_norm": 14.77700424194336, "learning_rate": 4.94e-06, "loss": 1.3685, "step": 250 }, { "epoch": 0.13020833333333334, "grad_norm": 15.917333602905273, "learning_rate": 5.4400000000000004e-06, "loss": 1.2807, "step": 275 }, { "epoch": 0.14204545454545456, "grad_norm": 14.992497444152832, "learning_rate": 5.94e-06, "loss": 1.1803, "step": 300 }, { "epoch": 0.15388257575757575, "grad_norm": 15.551587104797363, "learning_rate": 6.440000000000001e-06, "loss": 1.148, "step": 325 }, { "epoch": 0.16571969696969696, "grad_norm": 14.418575286865234, "learning_rate": 6.9400000000000005e-06, "loss": 1.0953, "step": 350 }, { "epoch": 0.17755681818181818, "grad_norm": 15.400343894958496, "learning_rate": 7.440000000000001e-06, "loss": 1.0356, "step": 375 }, { "epoch": 0.1893939393939394, "grad_norm": 12.426461219787598, "learning_rate": 7.94e-06, "loss": 0.9864, "step": 400 }, { "epoch": 0.2012310606060606, "grad_norm": 13.831282615661621, "learning_rate": 8.44e-06, "loss": 0.9653, "step": 425 }, { "epoch": 0.21306818181818182, "grad_norm": 15.357656478881836, "learning_rate": 8.94e-06, "loss": 0.9935, "step": 450 }, { "epoch": 0.22490530303030304, "grad_norm": 13.355987548828125, "learning_rate": 9.440000000000001e-06, "loss": 0.9047, "step": 475 }, { "epoch": 0.23674242424242425, "grad_norm": 13.817172050476074, "learning_rate": 9.940000000000001e-06, "loss": 0.9242, "step": 500 }, { "epoch": 0.24857954545454544, "grad_norm": 15.737274169921875, "learning_rate": 9.937142857142858e-06, "loss": 0.8997, "step": 525 }, { "epoch": 0.2604166666666667, "grad_norm": 12.787036895751953, "learning_rate": 9.865714285714285e-06, "loss": 0.8344, "step": 550 }, { "epoch": 0.2722537878787879, "grad_norm": 13.924437522888184, "learning_rate": 9.794285714285714e-06, "loss": 0.8179, "step": 575 }, { "epoch": 0.2840909090909091, "grad_norm": 14.679474830627441, "learning_rate": 9.722857142857143e-06, "loss": 0.7755, "step": 600 }, { "epoch": 0.2959280303030303, "grad_norm": 13.893200874328613, "learning_rate": 9.651428571428572e-06, "loss": 0.7419, "step": 625 }, { "epoch": 0.3077651515151515, "grad_norm": 14.868252754211426, "learning_rate": 9.58e-06, "loss": 0.7708, "step": 650 }, { "epoch": 0.3196022727272727, "grad_norm": 14.574288368225098, "learning_rate": 9.508571428571429e-06, "loss": 0.7333, "step": 675 }, { "epoch": 0.3314393939393939, "grad_norm": 15.690669059753418, "learning_rate": 9.437142857142858e-06, "loss": 0.7006, "step": 700 }, { "epoch": 0.34327651515151514, "grad_norm": 12.136014938354492, "learning_rate": 9.365714285714287e-06, "loss": 0.705, "step": 725 }, { "epoch": 0.35511363636363635, "grad_norm": 13.789889335632324, "learning_rate": 9.294285714285714e-06, "loss": 0.7195, "step": 750 }, { "epoch": 0.36695075757575757, "grad_norm": 11.885604858398438, "learning_rate": 9.222857142857143e-06, "loss": 0.6753, "step": 775 }, { "epoch": 0.3787878787878788, "grad_norm": 12.935856819152832, "learning_rate": 9.151428571428572e-06, "loss": 0.6545, "step": 800 }, { "epoch": 0.390625, "grad_norm": 13.731322288513184, "learning_rate": 9.080000000000001e-06, "loss": 0.7291, "step": 825 }, { "epoch": 0.4024621212121212, "grad_norm": 13.80007553100586, "learning_rate": 9.00857142857143e-06, "loss": 0.6667, "step": 850 }, { "epoch": 0.41429924242424243, "grad_norm": 11.839824676513672, "learning_rate": 8.937142857142857e-06, "loss": 0.6221, "step": 875 }, { "epoch": 0.42613636363636365, "grad_norm": 12.550195693969727, "learning_rate": 8.865714285714287e-06, "loss": 0.6114, "step": 900 }, { "epoch": 0.43797348484848486, "grad_norm": 12.121932029724121, "learning_rate": 8.794285714285716e-06, "loss": 0.652, "step": 925 }, { "epoch": 0.4498106060606061, "grad_norm": 13.047248840332031, "learning_rate": 8.722857142857145e-06, "loss": 0.6201, "step": 950 }, { "epoch": 0.4616477272727273, "grad_norm": 12.148579597473145, "learning_rate": 8.651428571428572e-06, "loss": 0.622, "step": 975 }, { "epoch": 0.4734848484848485, "grad_norm": 14.126455307006836, "learning_rate": 8.580000000000001e-06, "loss": 0.59, "step": 1000 }, { "epoch": 0.4734848484848485, "eval_loss": 0.5916658043861389, "eval_runtime": 413.6011, "eval_samples_per_second": 3.902, "eval_steps_per_second": 0.488, "eval_wer": 60.805065581184984, "step": 1000 }, { "epoch": 0.4853219696969697, "grad_norm": 11.438024520874023, "learning_rate": 8.50857142857143e-06, "loss": 0.6024, "step": 1025 }, { "epoch": 0.4971590909090909, "grad_norm": 11.665973663330078, "learning_rate": 8.437142857142859e-06, "loss": 0.5949, "step": 1050 }, { "epoch": 0.5089962121212122, "grad_norm": 12.14466381072998, "learning_rate": 8.365714285714286e-06, "loss": 0.5825, "step": 1075 }, { "epoch": 0.5208333333333334, "grad_norm": 11.364850044250488, "learning_rate": 8.294285714285715e-06, "loss": 0.5962, "step": 1100 }, { "epoch": 0.5326704545454546, "grad_norm": 13.55361557006836, "learning_rate": 8.222857142857144e-06, "loss": 0.5897, "step": 1125 }, { "epoch": 0.5445075757575758, "grad_norm": 11.306770324707031, "learning_rate": 8.151428571428572e-06, "loss": 0.5654, "step": 1150 }, { "epoch": 0.556344696969697, "grad_norm": 11.402146339416504, "learning_rate": 8.08e-06, "loss": 0.5916, "step": 1175 }, { "epoch": 0.5681818181818182, "grad_norm": 10.837797164916992, "learning_rate": 8.00857142857143e-06, "loss": 0.5345, "step": 1200 }, { "epoch": 0.5800189393939394, "grad_norm": 12.855684280395508, "learning_rate": 7.937142857142857e-06, "loss": 0.5263, "step": 1225 }, { "epoch": 0.5918560606060606, "grad_norm": 11.619951248168945, "learning_rate": 7.865714285714286e-06, "loss": 0.5461, "step": 1250 }, { "epoch": 0.6036931818181818, "grad_norm": 11.1388578414917, "learning_rate": 7.794285714285715e-06, "loss": 0.5625, "step": 1275 }, { "epoch": 0.615530303030303, "grad_norm": 11.090073585510254, "learning_rate": 7.722857142857142e-06, "loss": 0.5178, "step": 1300 }, { "epoch": 0.6273674242424242, "grad_norm": 10.146711349487305, "learning_rate": 7.651428571428571e-06, "loss": 0.5201, "step": 1325 }, { "epoch": 0.6392045454545454, "grad_norm": 16.155235290527344, "learning_rate": 7.58e-06, "loss": 0.5247, "step": 1350 }, { "epoch": 0.6510416666666666, "grad_norm": 12.52512264251709, "learning_rate": 7.508571428571429e-06, "loss": 0.5237, "step": 1375 }, { "epoch": 0.6628787878787878, "grad_norm": 9.786041259765625, "learning_rate": 7.4371428571428575e-06, "loss": 0.483, "step": 1400 }, { "epoch": 0.6747159090909091, "grad_norm": 11.963520050048828, "learning_rate": 7.365714285714286e-06, "loss": 0.5099, "step": 1425 }, { "epoch": 0.6865530303030303, "grad_norm": 11.08193302154541, "learning_rate": 7.294285714285715e-06, "loss": 0.5417, "step": 1450 }, { "epoch": 0.6983901515151515, "grad_norm": 10.529409408569336, "learning_rate": 7.222857142857144e-06, "loss": 0.4837, "step": 1475 }, { "epoch": 0.7102272727272727, "grad_norm": 10.106945037841797, "learning_rate": 7.151428571428573e-06, "loss": 0.5158, "step": 1500 }, { "epoch": 0.7220643939393939, "grad_norm": 13.151567459106445, "learning_rate": 7.08e-06, "loss": 0.5181, "step": 1525 }, { "epoch": 0.7339015151515151, "grad_norm": 13.37302303314209, "learning_rate": 7.008571428571429e-06, "loss": 0.5059, "step": 1550 }, { "epoch": 0.7457386363636364, "grad_norm": 11.584297180175781, "learning_rate": 6.937142857142858e-06, "loss": 0.5098, "step": 1575 }, { "epoch": 0.7575757575757576, "grad_norm": 9.750558853149414, "learning_rate": 6.865714285714287e-06, "loss": 0.4852, "step": 1600 }, { "epoch": 0.7694128787878788, "grad_norm": 13.944685935974121, "learning_rate": 6.794285714285714e-06, "loss": 0.5334, "step": 1625 }, { "epoch": 0.78125, "grad_norm": 11.0306396484375, "learning_rate": 6.722857142857143e-06, "loss": 0.5418, "step": 1650 }, { "epoch": 0.7930871212121212, "grad_norm": 10.839930534362793, "learning_rate": 6.651428571428572e-06, "loss": 0.4968, "step": 1675 }, { "epoch": 0.8049242424242424, "grad_norm": 10.40273380279541, "learning_rate": 6.5800000000000005e-06, "loss": 0.4605, "step": 1700 }, { "epoch": 0.8167613636363636, "grad_norm": 12.725444793701172, "learning_rate": 6.5085714285714295e-06, "loss": 0.4701, "step": 1725 }, { "epoch": 0.8285984848484849, "grad_norm": 10.695274353027344, "learning_rate": 6.437142857142858e-06, "loss": 0.464, "step": 1750 }, { "epoch": 0.8404356060606061, "grad_norm": 11.814519882202148, "learning_rate": 6.365714285714286e-06, "loss": 0.4919, "step": 1775 }, { "epoch": 0.8522727272727273, "grad_norm": 8.929429054260254, "learning_rate": 6.294285714285715e-06, "loss": 0.4433, "step": 1800 }, { "epoch": 0.8641098484848485, "grad_norm": 12.6130952835083, "learning_rate": 6.222857142857144e-06, "loss": 0.49, "step": 1825 }, { "epoch": 0.8759469696969697, "grad_norm": 9.853967666625977, "learning_rate": 6.151428571428571e-06, "loss": 0.4424, "step": 1850 }, { "epoch": 0.8877840909090909, "grad_norm": 11.585264205932617, "learning_rate": 6.08e-06, "loss": 0.457, "step": 1875 }, { "epoch": 0.8996212121212122, "grad_norm": 10.458426475524902, "learning_rate": 6.008571428571429e-06, "loss": 0.4771, "step": 1900 }, { "epoch": 0.9114583333333334, "grad_norm": 9.979692459106445, "learning_rate": 5.937142857142858e-06, "loss": 0.4393, "step": 1925 }, { "epoch": 0.9232954545454546, "grad_norm": 10.795998573303223, "learning_rate": 5.865714285714286e-06, "loss": 0.4375, "step": 1950 }, { "epoch": 0.9351325757575758, "grad_norm": 11.389334678649902, "learning_rate": 5.794285714285715e-06, "loss": 0.4289, "step": 1975 }, { "epoch": 0.946969696969697, "grad_norm": 11.161763191223145, "learning_rate": 5.722857142857144e-06, "loss": 0.4987, "step": 2000 }, { "epoch": 0.946969696969697, "eval_loss": 0.41947832703590393, "eval_runtime": 407.8391, "eval_samples_per_second": 3.957, "eval_steps_per_second": 0.495, "eval_wer": 47.85165083672546, "step": 2000 }, { "epoch": 0.9588068181818182, "grad_norm": 8.887962341308594, "learning_rate": 5.651428571428572e-06, "loss": 0.4188, "step": 2025 }, { "epoch": 0.9706439393939394, "grad_norm": 10.415024757385254, "learning_rate": 5.580000000000001e-06, "loss": 0.4313, "step": 2050 }, { "epoch": 0.9824810606060606, "grad_norm": 11.072915077209473, "learning_rate": 5.508571428571429e-06, "loss": 0.4364, "step": 2075 }, { "epoch": 0.9943181818181818, "grad_norm": 13.284914016723633, "learning_rate": 5.437142857142857e-06, "loss": 0.4868, "step": 2100 }, { "epoch": 1.006155303030303, "grad_norm": 9.159514427185059, "learning_rate": 5.365714285714286e-06, "loss": 0.3972, "step": 2125 }, { "epoch": 1.0179924242424243, "grad_norm": 9.100288391113281, "learning_rate": 5.294285714285715e-06, "loss": 0.4142, "step": 2150 }, { "epoch": 1.0298295454545454, "grad_norm": 8.876397132873535, "learning_rate": 5.2228571428571425e-06, "loss": 0.3867, "step": 2175 }, { "epoch": 1.0416666666666667, "grad_norm": 12.151432037353516, "learning_rate": 5.1514285714285715e-06, "loss": 0.3663, "step": 2200 }, { "epoch": 1.0535037878787878, "grad_norm": 9.571785926818848, "learning_rate": 5.0800000000000005e-06, "loss": 0.3979, "step": 2225 }, { "epoch": 1.0653409090909092, "grad_norm": 11.223183631896973, "learning_rate": 5.0085714285714295e-06, "loss": 0.3899, "step": 2250 }, { "epoch": 1.0771780303030303, "grad_norm": 8.434731483459473, "learning_rate": 4.937142857142858e-06, "loss": 0.3484, "step": 2275 }, { "epoch": 1.0890151515151516, "grad_norm": 10.766022682189941, "learning_rate": 4.865714285714287e-06, "loss": 0.4095, "step": 2300 }, { "epoch": 1.1008522727272727, "grad_norm": 8.537741661071777, "learning_rate": 4.794285714285715e-06, "loss": 0.3688, "step": 2325 }, { "epoch": 1.112689393939394, "grad_norm": 10.699164390563965, "learning_rate": 4.722857142857144e-06, "loss": 0.3559, "step": 2350 }, { "epoch": 1.1245265151515151, "grad_norm": 9.601336479187012, "learning_rate": 4.651428571428572e-06, "loss": 0.3711, "step": 2375 }, { "epoch": 1.1363636363636362, "grad_norm": 12.841407775878906, "learning_rate": 4.58e-06, "loss": 0.3541, "step": 2400 }, { "epoch": 1.1482007575757576, "grad_norm": 11.84900951385498, "learning_rate": 4.508571428571429e-06, "loss": 0.3548, "step": 2425 }, { "epoch": 1.160037878787879, "grad_norm": 9.47382926940918, "learning_rate": 4.437142857142857e-06, "loss": 0.3659, "step": 2450 }, { "epoch": 1.171875, "grad_norm": 11.142972946166992, "learning_rate": 4.3657142857142855e-06, "loss": 0.3473, "step": 2475 }, { "epoch": 1.183712121212121, "grad_norm": 9.656496047973633, "learning_rate": 4.2942857142857146e-06, "loss": 0.3634, "step": 2500 }, { "epoch": 1.1955492424242424, "grad_norm": 11.295799255371094, "learning_rate": 4.222857142857143e-06, "loss": 0.3573, "step": 2525 }, { "epoch": 1.2073863636363638, "grad_norm": 11.13376522064209, "learning_rate": 4.151428571428572e-06, "loss": 0.38, "step": 2550 }, { "epoch": 1.2192234848484849, "grad_norm": 10.365365028381348, "learning_rate": 4.08e-06, "loss": 0.3749, "step": 2575 }, { "epoch": 1.231060606060606, "grad_norm": 9.453851699829102, "learning_rate": 4.008571428571429e-06, "loss": 0.3256, "step": 2600 }, { "epoch": 1.2428977272727273, "grad_norm": 8.543546676635742, "learning_rate": 3.937142857142858e-06, "loss": 0.3744, "step": 2625 }, { "epoch": 1.2547348484848486, "grad_norm": 11.191537857055664, "learning_rate": 3.865714285714286e-06, "loss": 0.3493, "step": 2650 }, { "epoch": 1.2665719696969697, "grad_norm": 7.995920181274414, "learning_rate": 3.7942857142857147e-06, "loss": 0.3684, "step": 2675 }, { "epoch": 1.2784090909090908, "grad_norm": 10.559860229492188, "learning_rate": 3.722857142857143e-06, "loss": 0.3364, "step": 2700 }, { "epoch": 1.2902462121212122, "grad_norm": 10.414945602416992, "learning_rate": 3.651428571428572e-06, "loss": 0.3629, "step": 2725 }, { "epoch": 1.3020833333333333, "grad_norm": 8.4398775100708, "learning_rate": 3.58e-06, "loss": 0.3464, "step": 2750 }, { "epoch": 1.3139204545454546, "grad_norm": 11.154573440551758, "learning_rate": 3.508571428571429e-06, "loss": 0.3394, "step": 2775 }, { "epoch": 1.3257575757575757, "grad_norm": 9.244209289550781, "learning_rate": 3.437142857142857e-06, "loss": 0.402, "step": 2800 }, { "epoch": 1.337594696969697, "grad_norm": 9.690404891967773, "learning_rate": 3.3657142857142862e-06, "loss": 0.3552, "step": 2825 }, { "epoch": 1.3494318181818181, "grad_norm": 9.71345043182373, "learning_rate": 3.2942857142857144e-06, "loss": 0.3483, "step": 2850 }, { "epoch": 1.3612689393939394, "grad_norm": 11.922121047973633, "learning_rate": 3.222857142857143e-06, "loss": 0.3582, "step": 2875 }, { "epoch": 1.3731060606060606, "grad_norm": 9.053750038146973, "learning_rate": 3.151428571428572e-06, "loss": 0.3409, "step": 2900 }, { "epoch": 1.3849431818181819, "grad_norm": 11.320926666259766, "learning_rate": 3.08e-06, "loss": 0.3467, "step": 2925 }, { "epoch": 1.396780303030303, "grad_norm": 10.648012161254883, "learning_rate": 3.008571428571429e-06, "loss": 0.3465, "step": 2950 }, { "epoch": 1.4086174242424243, "grad_norm": 9.58975601196289, "learning_rate": 2.9371428571428573e-06, "loss": 0.3686, "step": 2975 }, { "epoch": 1.4204545454545454, "grad_norm": 10.39020824432373, "learning_rate": 2.865714285714286e-06, "loss": 0.3932, "step": 3000 }, { "epoch": 1.4204545454545454, "eval_loss": 0.35611891746520996, "eval_runtime": 410.66, "eval_samples_per_second": 3.93, "eval_steps_per_second": 0.492, "eval_wer": 42.66847580280416, "step": 3000 }, { "epoch": 1.4322916666666667, "grad_norm": 10.914188385009766, "learning_rate": 2.7942857142857145e-06, "loss": 0.3501, "step": 3025 }, { "epoch": 1.4441287878787878, "grad_norm": 8.137165069580078, "learning_rate": 2.722857142857143e-06, "loss": 0.339, "step": 3050 }, { "epoch": 1.4559659090909092, "grad_norm": 10.744149208068848, "learning_rate": 2.6514285714285713e-06, "loss": 0.3442, "step": 3075 }, { "epoch": 1.4678030303030303, "grad_norm": 10.14416790008545, "learning_rate": 2.5800000000000003e-06, "loss": 0.3455, "step": 3100 }, { "epoch": 1.4796401515151514, "grad_norm": 11.480497360229492, "learning_rate": 2.5085714285714285e-06, "loss": 0.3431, "step": 3125 }, { "epoch": 1.4914772727272727, "grad_norm": 14.735957145690918, "learning_rate": 2.4371428571428575e-06, "loss": 0.3572, "step": 3150 }, { "epoch": 1.503314393939394, "grad_norm": 10.636829376220703, "learning_rate": 2.365714285714286e-06, "loss": 0.3334, "step": 3175 }, { "epoch": 1.5151515151515151, "grad_norm": 8.89909553527832, "learning_rate": 2.2942857142857146e-06, "loss": 0.3294, "step": 3200 }, { "epoch": 1.5269886363636362, "grad_norm": 9.316367149353027, "learning_rate": 2.222857142857143e-06, "loss": 0.3459, "step": 3225 }, { "epoch": 1.5388257575757576, "grad_norm": 9.63460922241211, "learning_rate": 2.1514285714285714e-06, "loss": 0.3403, "step": 3250 }, { "epoch": 1.550662878787879, "grad_norm": 10.554502487182617, "learning_rate": 2.08e-06, "loss": 0.3656, "step": 3275 }, { "epoch": 1.5625, "grad_norm": 9.131353378295898, "learning_rate": 2.0085714285714286e-06, "loss": 0.3313, "step": 3300 }, { "epoch": 1.574337121212121, "grad_norm": 11.400104522705078, "learning_rate": 1.9371428571428576e-06, "loss": 0.3547, "step": 3325 }, { "epoch": 1.5861742424242424, "grad_norm": 10.741024017333984, "learning_rate": 1.865714285714286e-06, "loss": 0.3639, "step": 3350 }, { "epoch": 1.5980113636363638, "grad_norm": 7.954362869262695, "learning_rate": 1.7942857142857146e-06, "loss": 0.3232, "step": 3375 }, { "epoch": 1.6098484848484849, "grad_norm": 8.994126319885254, "learning_rate": 1.7228571428571432e-06, "loss": 0.3328, "step": 3400 }, { "epoch": 1.621685606060606, "grad_norm": 9.671642303466797, "learning_rate": 1.6514285714285715e-06, "loss": 0.3249, "step": 3425 }, { "epoch": 1.6335227272727273, "grad_norm": 12.057549476623535, "learning_rate": 1.5800000000000001e-06, "loss": 0.3126, "step": 3450 }, { "epoch": 1.6453598484848486, "grad_norm": 8.958949089050293, "learning_rate": 1.5085714285714287e-06, "loss": 0.3185, "step": 3475 }, { "epoch": 1.6571969696969697, "grad_norm": 10.565046310424805, "learning_rate": 1.4371428571428573e-06, "loss": 0.3424, "step": 3500 }, { "epoch": 1.6690340909090908, "grad_norm": 9.329957962036133, "learning_rate": 1.3657142857142857e-06, "loss": 0.337, "step": 3525 }, { "epoch": 1.6808712121212122, "grad_norm": 8.723917007446289, "learning_rate": 1.2942857142857143e-06, "loss": 0.3487, "step": 3550 }, { "epoch": 1.6927083333333335, "grad_norm": 9.308070182800293, "learning_rate": 1.222857142857143e-06, "loss": 0.3356, "step": 3575 }, { "epoch": 1.7045454545454546, "grad_norm": 8.200910568237305, "learning_rate": 1.1514285714285714e-06, "loss": 0.3008, "step": 3600 }, { "epoch": 1.7163825757575757, "grad_norm": 12.48343276977539, "learning_rate": 1.08e-06, "loss": 0.3621, "step": 3625 }, { "epoch": 1.728219696969697, "grad_norm": 9.072750091552734, "learning_rate": 1.0085714285714286e-06, "loss": 0.2966, "step": 3650 }, { "epoch": 1.7400568181818183, "grad_norm": 9.534562110900879, "learning_rate": 9.371428571428571e-07, "loss": 0.3444, "step": 3675 }, { "epoch": 1.7518939393939394, "grad_norm": 8.556685447692871, "learning_rate": 8.657142857142858e-07, "loss": 0.323, "step": 3700 }, { "epoch": 1.7637310606060606, "grad_norm": 9.425592422485352, "learning_rate": 7.942857142857144e-07, "loss": 0.3433, "step": 3725 }, { "epoch": 1.7755681818181817, "grad_norm": 10.603422164916992, "learning_rate": 7.228571428571429e-07, "loss": 0.3376, "step": 3750 }, { "epoch": 1.787405303030303, "grad_norm": 8.562864303588867, "learning_rate": 6.514285714285715e-07, "loss": 0.3175, "step": 3775 }, { "epoch": 1.7992424242424243, "grad_norm": 10.139692306518555, "learning_rate": 5.800000000000001e-07, "loss": 0.3453, "step": 3800 }, { "epoch": 1.8110795454545454, "grad_norm": 10.980659484863281, "learning_rate": 5.085714285714286e-07, "loss": 0.3448, "step": 3825 }, { "epoch": 1.8229166666666665, "grad_norm": 9.530597686767578, "learning_rate": 4.371428571428572e-07, "loss": 0.3303, "step": 3850 }, { "epoch": 1.8347537878787878, "grad_norm": 7.428104877471924, "learning_rate": 3.657142857142858e-07, "loss": 0.3111, "step": 3875 }, { "epoch": 1.8465909090909092, "grad_norm": 9.596365928649902, "learning_rate": 2.942857142857143e-07, "loss": 0.3421, "step": 3900 }, { "epoch": 1.8584280303030303, "grad_norm": 10.390746116638184, "learning_rate": 2.228571428571429e-07, "loss": 0.3443, "step": 3925 }, { "epoch": 1.8702651515151514, "grad_norm": 10.99935245513916, "learning_rate": 1.5142857142857144e-07, "loss": 0.3591, "step": 3950 }, { "epoch": 1.8821022727272727, "grad_norm": 11.292437553405762, "learning_rate": 8e-08, "loss": 0.3492, "step": 3975 }, { "epoch": 1.893939393939394, "grad_norm": 10.099928855895996, "learning_rate": 8.571428571428572e-09, "loss": 0.3441, "step": 4000 }, { "epoch": 1.893939393939394, "eval_loss": 0.33238619565963745, "eval_runtime": 435.0715, "eval_samples_per_second": 3.71, "eval_steps_per_second": 0.464, "eval_wer": 38.50746268656716, "step": 4000 } ], "logging_steps": 25, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5752393644032e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }