{ "best_metric": 0.9433232545852661, "best_model_checkpoint": "./Hubert-noisy-cv-kakeiken/checkpoint-3463", "epoch": 29.99148014440433, "eval_steps": 500, "global_step": 103860, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1444043321299639, "grad_norm": 26.404050827026367, "learning_rate": 1.1903999999999998e-05, "loss": 7.4496, "step": 500 }, { "epoch": 0.2888086642599278, "grad_norm": 3.3450686931610107, "learning_rate": 2.3903999999999997e-05, "loss": 3.5644, "step": 1000 }, { "epoch": 0.4332129963898917, "grad_norm": 5.653954029083252, "learning_rate": 3.5903999999999994e-05, "loss": 2.4592, "step": 1500 }, { "epoch": 0.5776173285198556, "grad_norm": 11.444368362426758, "learning_rate": 4.7903999999999994e-05, "loss": 0.8948, "step": 2000 }, { "epoch": 0.7220216606498195, "grad_norm": 7.729633808135986, "learning_rate": 5.9903999999999994e-05, "loss": 0.4747, "step": 2500 }, { "epoch": 0.8664259927797834, "grad_norm": 6.9305548667907715, "learning_rate": 7.1904e-05, "loss": 0.3007, "step": 3000 }, { "epoch": 1.0, "eval_cer": 0.3277142520334787, "eval_loss": 0.9433232545852661, "eval_runtime": 160.3192, "eval_samples_per_second": 30.277, "eval_steps_per_second": 3.786, "eval_wer": 1.0, "step": 3463 }, { "epoch": 1.0106859205776173, "grad_norm": 3.508267641067505, "learning_rate": 8.390399999999999e-05, "loss": 0.2061, "step": 3500 }, { "epoch": 1.1550902527075813, "grad_norm": 3.135434627532959, "learning_rate": 9.5904e-05, "loss": 0.1695, "step": 4000 }, { "epoch": 1.2994945848375452, "grad_norm": 5.464595317840576, "learning_rate": 0.00010790399999999999, "loss": 0.1605, "step": 4500 }, { "epoch": 1.443898916967509, "grad_norm": 4.944324493408203, "learning_rate": 0.00011985599999999999, "loss": 0.1488, "step": 5000 }, { "epoch": 1.588303249097473, "grad_norm": 2.518165111541748, "learning_rate": 0.000131856, "loss": 0.143, "step": 5500 }, { "epoch": 1.7327075812274368, "grad_norm": 2.0370981693267822, "learning_rate": 0.00014385599999999999, "loss": 0.1409, "step": 6000 }, { "epoch": 1.8771119133574006, "grad_norm": 1.2558594942092896, "learning_rate": 0.00015585599999999998, "loss": 0.1409, "step": 6500 }, { "epoch": 2.0, "eval_cer": 0.36058995819693684, "eval_loss": 1.006810188293457, "eval_runtime": 160.5468, "eval_samples_per_second": 30.234, "eval_steps_per_second": 3.781, "eval_wer": 1.0, "step": 6926 }, { "epoch": 2.0213718411552346, "grad_norm": 0.9516662359237671, "learning_rate": 0.000167856, "loss": 0.1401, "step": 7000 }, { "epoch": 2.165776173285199, "grad_norm": 0.7812130451202393, "learning_rate": 0.000179856, "loss": 0.1349, "step": 7500 }, { "epoch": 2.3101805054151625, "grad_norm": 2.2682478427886963, "learning_rate": 0.000191856, "loss": 0.1403, "step": 8000 }, { "epoch": 2.4545848375451262, "grad_norm": 1.903978943824768, "learning_rate": 0.00020383199999999998, "loss": 0.1429, "step": 8500 }, { "epoch": 2.5989891696750904, "grad_norm": 2.040816068649292, "learning_rate": 0.00021583199999999998, "loss": 0.146, "step": 9000 }, { "epoch": 2.743393501805054, "grad_norm": 1.194913625717163, "learning_rate": 0.00022783199999999997, "loss": 0.1439, "step": 9500 }, { "epoch": 2.887797833935018, "grad_norm": 2.542681932449341, "learning_rate": 0.000239832, "loss": 0.1444, "step": 10000 }, { "epoch": 3.0, "eval_cer": 0.3839398253520616, "eval_loss": 1.0953569412231445, "eval_runtime": 159.8402, "eval_samples_per_second": 30.368, "eval_steps_per_second": 3.798, "eval_wer": 1.0, "step": 10389 }, { "epoch": 3.032057761732852, "grad_norm": 1.612290382385254, "learning_rate": 0.00025180799999999996, "loss": 0.1546, "step": 10500 }, { "epoch": 3.176462093862816, "grad_norm": 0.3490264117717743, "learning_rate": 0.000263808, "loss": 0.146, "step": 11000 }, { "epoch": 3.32086642599278, "grad_norm": 1.5585139989852905, "learning_rate": 0.00027580799999999995, "loss": 0.1503, "step": 11500 }, { "epoch": 3.4652707581227435, "grad_norm": 0.414385050535202, "learning_rate": 0.00028780799999999997, "loss": 0.1675, "step": 12000 }, { "epoch": 3.6096750902527077, "grad_norm": 3.544149875640869, "learning_rate": 0.00029976, "loss": 0.1659, "step": 12500 }, { "epoch": 3.7540794223826714, "grad_norm": 0.9274868369102478, "learning_rate": 0.00029997870730113894, "loss": 0.1703, "step": 13000 }, { "epoch": 3.898483754512635, "grad_norm": 0.9597361087799072, "learning_rate": 0.00029991308849456266, "loss": 0.1518, "step": 13500 }, { "epoch": 4.0, "eval_cer": 0.41253547819621145, "eval_loss": 1.2021042108535767, "eval_runtime": 160.538, "eval_samples_per_second": 30.236, "eval_steps_per_second": 3.781, "eval_wer": 1.0015622780854991, "step": 13852 }, { "epoch": 4.042743682310469, "grad_norm": 3.4597253799438477, "learning_rate": 0.00029980315410927364, "loss": 0.168, "step": 14000 }, { "epoch": 4.187148014440433, "grad_norm": 3.795635461807251, "learning_rate": 0.0002996489366428073, "loss": 0.1587, "step": 14500 }, { "epoch": 4.331552346570398, "grad_norm": 3.2676961421966553, "learning_rate": 0.00029945048168315293, "loss": 0.1576, "step": 15000 }, { "epoch": 4.475956678700361, "grad_norm": 9.203089714050293, "learning_rate": 0.00029920784789527755, "loss": 0.1734, "step": 15500 }, { "epoch": 4.620361010830325, "grad_norm": 3.076458215713501, "learning_rate": 0.000298921107003784, "loss": 0.1638, "step": 16000 }, { "epoch": 4.764765342960289, "grad_norm": 3.112588882446289, "learning_rate": 0.0002985903437717083, "loss": 0.1538, "step": 16500 }, { "epoch": 4.9091696750902525, "grad_norm": 2.951866865158081, "learning_rate": 0.00029821565597546357, "loss": 0.1691, "step": 17000 }, { "epoch": 5.0, "eval_cer": 0.4464585279155596, "eval_loss": 1.3226780891418457, "eval_runtime": 160.8675, "eval_samples_per_second": 30.174, "eval_steps_per_second": 3.773, "eval_wer": 1.0224399943189888, "step": 17315 }, { "epoch": 5.0534296028880865, "grad_norm": 1.8686376810073853, "learning_rate": 0.0002977971543759359, "loss": 0.1628, "step": 17500 }, { "epoch": 5.197833935018051, "grad_norm": 1.5772639513015747, "learning_rate": 0.00029733496268574276, "loss": 0.1503, "step": 18000 }, { "epoch": 5.342238267148014, "grad_norm": 2.1140050888061523, "learning_rate": 0.00029682921753266254, "loss": 0.1572, "step": 18500 }, { "epoch": 5.486642599277978, "grad_norm": 1.2794091701507568, "learning_rate": 0.0002962800684192461, "loss": 0.1587, "step": 19000 }, { "epoch": 5.631046931407942, "grad_norm": 2.263223886489868, "learning_rate": 0.0002956876776786226, "loss": 0.1557, "step": 19500 }, { "epoch": 5.7754512635379065, "grad_norm": 1.6803418397903442, "learning_rate": 0.00029505222042651257, "loss": 0.1491, "step": 20000 }, { "epoch": 5.91985559566787, "grad_norm": 2.0260798931121826, "learning_rate": 0.0002943738845094619, "loss": 0.1612, "step": 20500 }, { "epoch": 6.0, "eval_cer": 0.41649362072561413, "eval_loss": 1.2268037796020508, "eval_runtime": 160.8032, "eval_samples_per_second": 30.186, "eval_steps_per_second": 3.775, "eval_wer": 1.0086635421104957, "step": 20778 }, { "epoch": 6.064115523465704, "grad_norm": 3.2388651371002197, "learning_rate": 0.00029365435493064875, "loss": 0.1504, "step": 21000 }, { "epoch": 6.208519855595668, "grad_norm": 2.9128246307373047, "learning_rate": 0.0002928925295955263, "loss": 0.1598, "step": 21500 }, { "epoch": 6.352924187725632, "grad_norm": 2.8175065517425537, "learning_rate": 0.00029208863280623094, "loss": 0.1671, "step": 22000 }, { "epoch": 6.4973285198555955, "grad_norm": 25.644344329833984, "learning_rate": 0.00029124116456124535, "loss": 0.1536, "step": 22500 }, { "epoch": 6.64173285198556, "grad_norm": 1.3934019804000854, "learning_rate": 0.00029035194423171235, "loss": 0.1613, "step": 23000 }, { "epoch": 6.786137184115524, "grad_norm": 3.1552484035491943, "learning_rate": 0.00028942123467869287, "loss": 0.1786, "step": 23500 }, { "epoch": 6.930541516245487, "grad_norm": 1.4715551137924194, "learning_rate": 0.00028844931102781405, "loss": 0.155, "step": 24000 }, { "epoch": 7.0, "eval_cer": 0.4389276290136835, "eval_loss": 1.30885648727417, "eval_runtime": 167.0748, "eval_samples_per_second": 29.053, "eval_steps_per_second": 3.633, "eval_wer": 1.016048856696492, "step": 24241 }, { "epoch": 7.074801444043321, "grad_norm": 0.8942265510559082, "learning_rate": 0.000287436460587939, "loss": 0.1548, "step": 24500 }, { "epoch": 7.219205776173285, "grad_norm": 0.6779004335403442, "learning_rate": 0.0002863829827662367, "loss": 0.1539, "step": 25000 }, { "epoch": 7.3636101083032495, "grad_norm": 1.1583468914031982, "learning_rate": 0.00028528918897967396, "loss": 0.1574, "step": 25500 }, { "epoch": 7.508014440433213, "grad_norm": 1.8671414852142334, "learning_rate": 0.0002841554025629586, "loss": 0.1458, "step": 26000 }, { "epoch": 7.652418772563177, "grad_norm": 1.718024492263794, "learning_rate": 0.00028298195867295837, "loss": 0.1474, "step": 26500 }, { "epoch": 7.796823104693141, "grad_norm": 1.007827639579773, "learning_rate": 0.00028176920418962616, "loss": 0.1458, "step": 27000 }, { "epoch": 7.941227436823104, "grad_norm": 1.9642738103866577, "learning_rate": 0.0002805174976134593, "loss": 0.1529, "step": 27500 }, { "epoch": 8.0, "eval_cer": 0.42341243573119086, "eval_loss": 1.2341458797454834, "eval_runtime": 160.7088, "eval_samples_per_second": 30.204, "eval_steps_per_second": 3.777, "eval_wer": 1.0017043033659991, "step": 27704 }, { "epoch": 8.085487364620938, "grad_norm": 2.8043782711029053, "learning_rate": 0.0002792272089595235, "loss": 0.1448, "step": 28000 }, { "epoch": 8.229891696750903, "grad_norm": 3.478253126144409, "learning_rate": 0.00027789871964807356, "loss": 0.1358, "step": 28500 }, { "epoch": 8.374296028880867, "grad_norm": 2.4479691982269287, "learning_rate": 0.0002765351924520405, "loss": 0.1448, "step": 29000 }, { "epoch": 8.518700361010831, "grad_norm": 2.2376480102539062, "learning_rate": 0.0002751315655376498, "loss": 0.1413, "step": 29500 }, { "epoch": 8.663104693140795, "grad_norm": 1.8401309251785278, "learning_rate": 0.00027369094867263035, "loss": 0.1511, "step": 30000 }, { "epoch": 8.807509025270758, "grad_norm": 2.709879159927368, "learning_rate": 0.0002722137677155292, "loss": 0.1431, "step": 30500 }, { "epoch": 8.951913357400722, "grad_norm": 4.700524806976318, "learning_rate": 0.00027070045933354903, "loss": 0.1458, "step": 31000 }, { "epoch": 9.0, "eval_cer": 0.42497211617805747, "eval_loss": 1.2318909168243408, "eval_runtime": 161.5882, "eval_samples_per_second": 30.039, "eval_steps_per_second": 3.756, "eval_wer": 1.0095156937934953, "step": 31167 }, { "epoch": 9.096173285198557, "grad_norm": 1.5157949924468994, "learning_rate": 0.00026915460415649834, "loss": 0.1451, "step": 31500 }, { "epoch": 9.240577617328519, "grad_norm": 2.0823187828063965, "learning_rate": 0.00026757046349286563, "loss": 0.1408, "step": 32000 }, { "epoch": 9.384981949458483, "grad_norm": 0.6664170026779175, "learning_rate": 0.0002659515680044105, "loss": 0.136, "step": 32500 }, { "epoch": 9.529386281588447, "grad_norm": 0.9996655583381653, "learning_rate": 0.0002642983962503514, "loss": 0.1381, "step": 33000 }, { "epoch": 9.673790613718412, "grad_norm": 1.259697437286377, "learning_rate": 0.0002626114369222609, "loss": 0.1356, "step": 33500 }, { "epoch": 9.818194945848376, "grad_norm": 1.662197232246399, "learning_rate": 0.00026089118869960495, "loss": 0.1406, "step": 34000 }, { "epoch": 9.96259927797834, "grad_norm": 2.3641550540924072, "learning_rate": 0.00025913816010232854, "loss": 0.1371, "step": 34500 }, { "epoch": 10.0, "eval_cer": 0.41305234904197535, "eval_loss": 1.168934941291809, "eval_runtime": 165.3516, "eval_samples_per_second": 29.356, "eval_steps_per_second": 3.671, "eval_wer": 1.0041187331344978, "step": 34630 }, { "epoch": 10.106859205776173, "grad_norm": 1.9813348054885864, "learning_rate": 0.00025735286934053296, "loss": 0.1333, "step": 35000 }, { "epoch": 10.251263537906137, "grad_norm": 3.8109192848205566, "learning_rate": 0.0002555395095271303, "loss": 0.1302, "step": 35500 }, { "epoch": 10.395667870036101, "grad_norm": 1.4740395545959473, "learning_rate": 0.00025369134891083925, "loss": 0.1354, "step": 36000 }, { "epoch": 10.540072202166066, "grad_norm": 0.5686298608779907, "learning_rate": 0.00025181253625355146, "loss": 0.1369, "step": 36500 }, { "epoch": 10.684476534296028, "grad_norm": 0.686019241809845, "learning_rate": 0.00024990362694820174, "loss": 0.1376, "step": 37000 }, { "epoch": 10.828880866425992, "grad_norm": 2.498487710952759, "learning_rate": 0.0002479651852845492, "loss": 0.1353, "step": 37500 }, { "epoch": 10.973285198555956, "grad_norm": 1.6995295286178589, "learning_rate": 0.00024600174760064267, "loss": 0.1295, "step": 38000 }, { "epoch": 11.0, "eval_cer": 0.4175092265979924, "eval_loss": 1.2024015188217163, "eval_runtime": 161.724, "eval_samples_per_second": 30.014, "eval_steps_per_second": 3.753, "eval_wer": 1.027836954977986, "step": 38093 }, { "epoch": 11.117545126353791, "grad_norm": 1.2246454954147339, "learning_rate": 0.0002440060250098273, "loss": 0.1335, "step": 38500 }, { "epoch": 11.261949458483755, "grad_norm": 0.7367494106292725, "learning_rate": 0.0002419825134417944, "loss": 0.1274, "step": 39000 }, { "epoch": 11.406353790613718, "grad_norm": 0.7088403701782227, "learning_rate": 0.00023993593920328243, "loss": 0.1273, "step": 39500 }, { "epoch": 11.550758122743682, "grad_norm": 0.77787846326828, "learning_rate": 0.00023785870477908774, "loss": 0.1224, "step": 40000 }, { "epoch": 11.695162454873646, "grad_norm": 0.3578490614891052, "learning_rate": 0.0002357597304978587, "loss": 0.1336, "step": 40500 }, { "epoch": 11.83956678700361, "grad_norm": 1.0242489576339722, "learning_rate": 0.00023363122431926088, "loss": 0.1263, "step": 41000 }, { "epoch": 11.983971119133574, "grad_norm": 0.9205417633056641, "learning_rate": 0.0002314779960424034, "loss": 0.1347, "step": 41500 }, { "epoch": 12.0, "eval_cer": 0.41923666337199283, "eval_loss": 1.2089439630508423, "eval_runtime": 162.6658, "eval_samples_per_second": 29.84, "eval_steps_per_second": 3.732, "eval_wer": 1.014202528049993, "step": 41556 }, { "epoch": 12.128231046931408, "grad_norm": 3.0986785888671875, "learning_rate": 0.00022930068217980276, "loss": 0.1193, "step": 42000 }, { "epoch": 12.272635379061372, "grad_norm": 1.066496729850769, "learning_rate": 0.0002270999263638783, "loss": 0.1246, "step": 42500 }, { "epoch": 12.417039711191336, "grad_norm": 2.58195161819458, "learning_rate": 0.0002248763791566895, "loss": 0.1203, "step": 43000 }, { "epoch": 12.5614440433213, "grad_norm": 3.434950590133667, "learning_rate": 0.00022263069785762412, "loss": 0.1269, "step": 43500 }, { "epoch": 12.705848375451264, "grad_norm": 0.8184084296226501, "learning_rate": 0.00022036354630909492, "loss": 0.1289, "step": 44000 }, { "epoch": 12.850252707581227, "grad_norm": 1.028419017791748, "learning_rate": 0.00021807559470030224, "loss": 0.1213, "step": 44500 }, { "epoch": 12.994657039711191, "grad_norm": 2.4486939907073975, "learning_rate": 0.0002157675193691205, "loss": 0.1161, "step": 45000 }, { "epoch": 13.0, "eval_cer": 0.3997950652436094, "eval_loss": 1.1460597515106201, "eval_runtime": 166.6426, "eval_samples_per_second": 29.128, "eval_steps_per_second": 3.643, "eval_wer": 1.0370685982104815, "step": 45019 }, { "epoch": 13.138916967509026, "grad_norm": 0.7183459401130676, "learning_rate": 0.00021344000260216674, "loss": 0.1131, "step": 45500 }, { "epoch": 13.28332129963899, "grad_norm": 0.8200274109840393, "learning_rate": 0.0002110937324331113, "loss": 0.1103, "step": 46000 }, { "epoch": 13.427725631768952, "grad_norm": 1.2378402948379517, "learning_rate": 0.0002087341486593028, "loss": 0.1216, "step": 46500 }, { "epoch": 13.572129963898917, "grad_norm": 1.049865484237671, "learning_rate": 0.000206352491777539, "loss": 0.1182, "step": 47000 }, { "epoch": 13.71653429602888, "grad_norm": 0.7171204090118408, "learning_rate": 0.00020395417662190396, "loss": 0.117, "step": 47500 }, { "epoch": 13.860938628158845, "grad_norm": 0.6098294854164124, "learning_rate": 0.00020153991215467125, "loss": 0.1162, "step": 48000 }, { "epoch": 14.0, "eval_cer": 0.3919785271901269, "eval_loss": 1.1235851049423218, "eval_runtime": 162.6346, "eval_samples_per_second": 29.846, "eval_steps_per_second": 3.732, "eval_wer": 1.0311035364294845, "step": 48482 }, { "epoch": 14.005198555956678, "grad_norm": 0.4615221321582794, "learning_rate": 0.00019911041205286604, "loss": 0.1147, "step": 48500 }, { "epoch": 14.149602888086642, "grad_norm": 1.9270977973937988, "learning_rate": 0.00019666639449729612, "loss": 0.1122, "step": 49000 }, { "epoch": 14.294007220216606, "grad_norm": 0.42991870641708374, "learning_rate": 0.00019420858196025158, "loss": 0.1095, "step": 49500 }, { "epoch": 14.43841155234657, "grad_norm": 2.022409200668335, "learning_rate": 0.00019173770099193592, "loss": 0.1043, "step": 50000 }, { "epoch": 14.582815884476535, "grad_norm": 3.110955238342285, "learning_rate": 0.00018925946026968494, "loss": 0.1087, "step": 50500 }, { "epoch": 14.727220216606499, "grad_norm": 0.9901391863822937, "learning_rate": 0.00018676465979908295, "loss": 0.1125, "step": 51000 }, { "epoch": 14.871624548736461, "grad_norm": 0.500348687171936, "learning_rate": 0.00018425899138365014, "loss": 0.1107, "step": 51500 }, { "epoch": 15.0, "eval_cer": 0.3797459171737139, "eval_loss": 1.0697165727615356, "eval_runtime": 162.7821, "eval_samples_per_second": 29.819, "eval_steps_per_second": 3.729, "eval_wer": 1.0275529044169862, "step": 51945 }, { "epoch": 15.015884476534296, "grad_norm": 1.7152873277664185, "learning_rate": 0.00018174319572019298, "loss": 0.111, "step": 52000 }, { "epoch": 15.16028880866426, "grad_norm": 2.201960802078247, "learning_rate": 0.0001792180164992182, "loss": 0.1004, "step": 52500 }, { "epoch": 15.304693140794225, "grad_norm": 2.0383799076080322, "learning_rate": 0.00017668420018509165, "loss": 0.1077, "step": 53000 }, { "epoch": 15.449097472924187, "grad_norm": 1.696825623512268, "learning_rate": 0.000174142495795377, "loss": 0.0983, "step": 53500 }, { "epoch": 15.593501805054151, "grad_norm": 0.3848896920681, "learning_rate": 0.0001715936546794199, "loss": 0.108, "step": 54000 }, { "epoch": 15.737906137184115, "grad_norm": 0.11238867044448853, "learning_rate": 0.00016903843029624277, "loss": 0.1074, "step": 54500 }, { "epoch": 15.88231046931408, "grad_norm": 1.0486277341842651, "learning_rate": 0.00016648270481018474, "loss": 0.1029, "step": 55000 }, { "epoch": 16.0, "eval_cer": 0.3806164364928953, "eval_loss": 1.055138111114502, "eval_runtime": 162.2519, "eval_samples_per_second": 29.916, "eval_steps_per_second": 3.741, "eval_wer": 1.0107939213179946, "step": 55408 }, { "epoch": 16.026570397111914, "grad_norm": 1.3309388160705566, "learning_rate": 0.00016391699057947273, "loss": 0.1015, "step": 55500 }, { "epoch": 16.170974729241877, "grad_norm": 1.9488023519515991, "learning_rate": 0.00016134716236846953, "loss": 0.0997, "step": 56000 }, { "epoch": 16.315379061371843, "grad_norm": 2.3801889419555664, "learning_rate": 0.00015877397984016044, "loss": 0.0953, "step": 56500 }, { "epoch": 16.459783393501805, "grad_norm": 1.8234468698501587, "learning_rate": 0.0001561982036490956, "loss": 0.0996, "step": 57000 }, { "epoch": 16.604187725631768, "grad_norm": 1.1469004154205322, "learning_rate": 0.00015362059521653376, "loss": 0.0946, "step": 57500 }, { "epoch": 16.748592057761734, "grad_norm": 1.5313127040863037, "learning_rate": 0.00015104707442428766, "loss": 0.0971, "step": 58000 }, { "epoch": 16.892996389891696, "grad_norm": 0.8713585138320923, "learning_rate": 0.00014846808756891557, "loss": 0.0992, "step": 58500 }, { "epoch": 17.0, "eval_cer": 0.3726910835245151, "eval_loss": 1.0634260177612305, "eval_runtime": 162.7474, "eval_samples_per_second": 29.825, "eval_steps_per_second": 3.73, "eval_wer": 1.0187473370259905, "step": 58871 }, { "epoch": 17.03725631768953, "grad_norm": 0.7538420557975769, "learning_rate": 0.0001458895535598333, "loss": 0.0958, "step": 59000 }, { "epoch": 17.181660649819495, "grad_norm": 0.6020956039428711, "learning_rate": 0.00014331223463353397, "loss": 0.0888, "step": 59500 }, { "epoch": 17.326064981949457, "grad_norm": 0.2920764982700348, "learning_rate": 0.00014073689266732187, "loss": 0.0935, "step": 60000 }, { "epoch": 17.470469314079423, "grad_norm": 0.38121268153190613, "learning_rate": 0.00013816428895409522, "loss": 0.0907, "step": 60500 }, { "epoch": 17.614873646209386, "grad_norm": 1.3912694454193115, "learning_rate": 0.00013559518397730143, "loss": 0.0904, "step": 61000 }, { "epoch": 17.75927797833935, "grad_norm": 1.2576638460159302, "learning_rate": 0.0001330303371861317, "loss": 0.0885, "step": 61500 }, { "epoch": 17.903682310469314, "grad_norm": 0.5512722134590149, "learning_rate": 0.00013047050677102173, "loss": 0.0906, "step": 62000 }, { "epoch": 18.0, "eval_cer": 0.36566798755882807, "eval_loss": 1.0298676490783691, "eval_runtime": 163.3497, "eval_samples_per_second": 29.715, "eval_steps_per_second": 3.716, "eval_wer": 1.0272688538559864, "step": 62334 }, { "epoch": 18.047942238267147, "grad_norm": 1.5918681621551514, "learning_rate": 0.0001279215512905178, "loss": 0.0933, "step": 62500 }, { "epoch": 18.192346570397113, "grad_norm": 0.9083497524261475, "learning_rate": 0.00012537909630689584, "loss": 0.0883, "step": 63000 }, { "epoch": 18.336750902527076, "grad_norm": 4.478044033050537, "learning_rate": 0.00012283881759384186, "loss": 0.0823, "step": 63500 }, { "epoch": 18.481155234657038, "grad_norm": 0.6478471159934998, "learning_rate": 0.00012030656795636058, "loss": 0.0867, "step": 64000 }, { "epoch": 18.625559566787004, "grad_norm": 1.4193707704544067, "learning_rate": 0.00011778309594889268, "loss": 0.0856, "step": 64500 }, { "epoch": 18.769963898916966, "grad_norm": 1.939756155014038, "learning_rate": 0.00011526914753113733, "loss": 0.0861, "step": 65000 }, { "epoch": 18.914368231046932, "grad_norm": 0.6198760867118835, "learning_rate": 0.00011276546584753979, "loss": 0.0793, "step": 65500 }, { "epoch": 19.0, "eval_cer": 0.360209105994795, "eval_loss": 1.0216537714004517, "eval_runtime": 167.428, "eval_samples_per_second": 28.992, "eval_steps_per_second": 3.625, "eval_wer": 1.0149126544524925, "step": 65797 }, { "epoch": 19.058628158844765, "grad_norm": 0.5381482839584351, "learning_rate": 0.00011027279100761163, "loss": 0.0812, "step": 66000 }, { "epoch": 19.203032490974728, "grad_norm": 0.16126175224781036, "learning_rate": 0.00010779185986714847, "loss": 0.0765, "step": 66500 }, { "epoch": 19.347436823104694, "grad_norm": 0.8277862668037415, "learning_rate": 0.00010532832978075136, "loss": 0.0752, "step": 67000 }, { "epoch": 19.491841155234656, "grad_norm": 0.1486959308385849, "learning_rate": 0.00010287305536442495, "loss": 0.0786, "step": 67500 }, { "epoch": 19.636245487364622, "grad_norm": 0.03346061706542969, "learning_rate": 0.00010043171207209488, "loss": 0.0815, "step": 68000 }, { "epoch": 19.780649819494585, "grad_norm": 0.7169551849365234, "learning_rate": 9.800985986544723e-05, "loss": 0.0821, "step": 68500 }, { "epoch": 19.92505415162455, "grad_norm": 1.7519513368606567, "learning_rate": 9.559850808163914e-05, "loss": 0.0769, "step": 69000 }, { "epoch": 20.0, "eval_cer": 0.3532857570344308, "eval_loss": 1.0025047063827515, "eval_runtime": 163.5589, "eval_samples_per_second": 29.677, "eval_steps_per_second": 3.711, "eval_wer": 1.0333759409174834, "step": 69260 }, { "epoch": 20.069314079422384, "grad_norm": 0.6677764058113098, "learning_rate": 9.32032378396191e-05, "loss": 0.0687, "step": 69500 }, { "epoch": 20.213718411552346, "grad_norm": 0.8097858428955078, "learning_rate": 9.082475720155842e-05, "loss": 0.0683, "step": 70000 }, { "epoch": 20.358122743682312, "grad_norm": 1.7854639291763306, "learning_rate": 8.846376926647926e-05, "loss": 0.076, "step": 70500 }, { "epoch": 20.502527075812274, "grad_norm": 1.4839706420898438, "learning_rate": 8.612563894153891e-05, "loss": 0.0721, "step": 71000 }, { "epoch": 20.646931407942237, "grad_norm": 1.315382957458496, "learning_rate": 8.380168636535842e-05, "loss": 0.0747, "step": 71500 }, { "epoch": 20.791335740072203, "grad_norm": 0.8365857601165771, "learning_rate": 8.14973025713836e-05, "loss": 0.0689, "step": 72000 }, { "epoch": 20.935740072202165, "grad_norm": 1.7877798080444336, "learning_rate": 7.92131687549819e-05, "loss": 0.0727, "step": 72500 }, { "epoch": 21.0, "eval_cer": 0.3509961098667924, "eval_loss": 1.0100905895233154, "eval_runtime": 164.1891, "eval_samples_per_second": 29.563, "eval_steps_per_second": 3.697, "eval_wer": 1.0386308762959806, "step": 72723 }, { "epoch": 21.08, "grad_norm": 0.24948537349700928, "learning_rate": 7.694996012545589e-05, "loss": 0.0695, "step": 73000 }, { "epoch": 21.224404332129964, "grad_norm": 1.7968688011169434, "learning_rate": 7.47128069427435e-05, "loss": 0.0662, "step": 73500 }, { "epoch": 21.368808664259927, "grad_norm": 0.3918834924697876, "learning_rate": 7.249340420378192e-05, "loss": 0.0668, "step": 74000 }, { "epoch": 21.513212996389893, "grad_norm": 0.7001883387565613, "learning_rate": 7.029691307101347e-05, "loss": 0.0631, "step": 74500 }, { "epoch": 21.657617328519855, "grad_norm": 0.15504246950149536, "learning_rate": 6.812398284582085e-05, "loss": 0.0658, "step": 75000 }, { "epoch": 21.80202166064982, "grad_norm": 0.9391471743583679, "learning_rate": 6.597525586478348e-05, "loss": 0.067, "step": 75500 }, { "epoch": 21.946425992779783, "grad_norm": 1.4281563758850098, "learning_rate": 6.385136730979628e-05, "loss": 0.0654, "step": 76000 }, { "epoch": 22.0, "eval_cer": 0.3493865559172644, "eval_loss": 1.0315525531768799, "eval_runtime": 164.9429, "eval_samples_per_second": 29.428, "eval_steps_per_second": 3.68, "eval_wer": 1.0345121431614828, "step": 76186 }, { "epoch": 22.090685920577616, "grad_norm": 1.5562834739685059, "learning_rate": 6.175294502030551e-05, "loss": 0.0644, "step": 76500 }, { "epoch": 22.235090252707582, "grad_norm": 0.5195419788360596, "learning_rate": 5.9680609307713216e-05, "loss": 0.0628, "step": 77000 }, { "epoch": 22.379494584837545, "grad_norm": 0.22496701776981354, "learning_rate": 5.763497277200883e-05, "loss": 0.0578, "step": 77500 }, { "epoch": 22.52389891696751, "grad_norm": 1.3434886932373047, "learning_rate": 5.5620649138778015e-05, "loss": 0.0604, "step": 78000 }, { "epoch": 22.668303249097473, "grad_norm": 1.0572506189346313, "learning_rate": 5.3630160617017374e-05, "loss": 0.062, "step": 78500 }, { "epoch": 22.812707581227436, "grad_norm": 0.6325031518936157, "learning_rate": 5.166815983602437e-05, "loss": 0.0599, "step": 79000 }, { "epoch": 22.9571119133574, "grad_norm": 0.4244992434978485, "learning_rate": 4.9735226779847895e-05, "loss": 0.0605, "step": 79500 }, { "epoch": 23.0, "eval_cer": 0.3438369952574833, "eval_loss": 1.058401346206665, "eval_runtime": 164.7139, "eval_samples_per_second": 29.469, "eval_steps_per_second": 3.685, "eval_wer": 1.0254225252094873, "step": 79649 }, { "epoch": 23.101371841155235, "grad_norm": 0.7197785973548889, "learning_rate": 4.783193283987115e-05, "loss": 0.0555, "step": 80000 }, { "epoch": 23.245776173285197, "grad_norm": 0.4348308742046356, "learning_rate": 4.595884064590357e-05, "loss": 0.054, "step": 80500 }, { "epoch": 23.390180505415163, "grad_norm": 0.482464998960495, "learning_rate": 4.411650389986266e-05, "loss": 0.0578, "step": 81000 }, { "epoch": 23.534584837545125, "grad_norm": 0.0030251918360590935, "learning_rate": 4.230905769062076e-05, "loss": 0.0573, "step": 81500 }, { "epoch": 23.67898916967509, "grad_norm": 0.4511868953704834, "learning_rate": 4.052979222002046e-05, "loss": 0.0553, "step": 82000 }, { "epoch": 23.823393501805054, "grad_norm": 0.6640037298202515, "learning_rate": 3.878288707004517e-05, "loss": 0.0527, "step": 82500 }, { "epoch": 23.96779783393502, "grad_norm": 0.03944293037056923, "learning_rate": 3.706885864065343e-05, "loss": 0.0566, "step": 83000 }, { "epoch": 24.0, "eval_cer": 0.3430752908531996, "eval_loss": 1.0380102396011353, "eval_runtime": 166.927, "eval_samples_per_second": 29.079, "eval_steps_per_second": 3.636, "eval_wer": 1.047862519528476, "step": 83112 }, { "epoch": 24.112057761732853, "grad_norm": 0.03125814348459244, "learning_rate": 3.538821361316671e-05, "loss": 0.0477, "step": 83500 }, { "epoch": 24.256462093862815, "grad_norm": 1.3713135719299316, "learning_rate": 3.374144880048992e-05, "loss": 0.0506, "step": 84000 }, { "epoch": 24.40086642599278, "grad_norm": 1.8183372020721436, "learning_rate": 3.212905100024987e-05, "loss": 0.0528, "step": 84500 }, { "epoch": 24.545270758122744, "grad_norm": 0.5360317826271057, "learning_rate": 3.055149685089306e-05, "loss": 0.0524, "step": 85000 }, { "epoch": 24.689675090252706, "grad_norm": 0.5588558316230774, "learning_rate": 2.9009252690787954e-05, "loss": 0.0524, "step": 85500 }, { "epoch": 24.834079422382672, "grad_norm": 0.8383269906044006, "learning_rate": 2.7502774420370843e-05, "loss": 0.0521, "step": 86000 }, { "epoch": 24.978483754512634, "grad_norm": 0.4565883278846741, "learning_rate": 2.603541147201389e-05, "loss": 0.0507, "step": 86500 }, { "epoch": 25.0, "eval_cer": 0.3413206503504747, "eval_loss": 1.0691407918930054, "eval_runtime": 177.9397, "eval_samples_per_second": 27.279, "eval_steps_per_second": 3.411, "eval_wer": 1.0427496094304787, "step": 86575 }, { "epoch": 25.12274368231047, "grad_norm": 0.5891485214233398, "learning_rate": 2.460171654158321e-05, "loss": 0.047, "step": 87000 }, { "epoch": 25.267148014440433, "grad_norm": 0.13385725021362305, "learning_rate": 2.3205090405857905e-05, "loss": 0.0482, "step": 87500 }, { "epoch": 25.411552346570396, "grad_norm": 0.4556754529476166, "learning_rate": 2.1845945919352885e-05, "loss": 0.047, "step": 88000 }, { "epoch": 25.555956678700362, "grad_norm": 0.5063700675964355, "learning_rate": 2.052728930967913e-05, "loss": 0.0488, "step": 88500 }, { "epoch": 25.700361010830324, "grad_norm": 0.33004090189933777, "learning_rate": 1.924422531652694e-05, "loss": 0.0504, "step": 89000 }, { "epoch": 25.84476534296029, "grad_norm": 0.5845174789428711, "learning_rate": 1.7999813837863757e-05, "loss": 0.0475, "step": 89500 }, { "epoch": 25.989169675090253, "grad_norm": 0.8764269351959229, "learning_rate": 1.6794422732262768e-05, "loss": 0.0498, "step": 90000 }, { "epoch": 26.0, "eval_cer": 0.34071310040896274, "eval_loss": 1.126129150390625, "eval_runtime": 162.3463, "eval_samples_per_second": 29.899, "eval_steps_per_second": 3.739, "eval_wer": 1.03990910382048, "step": 90038 }, { "epoch": 26.133429602888086, "grad_norm": 0.6448964476585388, "learning_rate": 1.5630700823369712e-05, "loss": 0.0497, "step": 90500 }, { "epoch": 26.27783393501805, "grad_norm": 0.4461418688297272, "learning_rate": 1.450432801630148e-05, "loss": 0.0431, "step": 91000 }, { "epoch": 26.422238267148014, "grad_norm": 0.36790505051612854, "learning_rate": 1.3418008877510817e-05, "loss": 0.0466, "step": 91500 }, { "epoch": 26.56664259927798, "grad_norm": 0.364467978477478, "learning_rate": 1.2372064532136216e-05, "loss": 0.0436, "step": 92000 }, { "epoch": 26.711046931407942, "grad_norm": 0.4200996458530426, "learning_rate": 1.1366804170185262e-05, "loss": 0.0464, "step": 92500 }, { "epoch": 26.855451263537905, "grad_norm": 0.02257300540804863, "learning_rate": 1.0402524955135344e-05, "loss": 0.0487, "step": 93000 }, { "epoch": 26.99985559566787, "grad_norm": 1.0951905250549316, "learning_rate": 9.481316595070204e-06, "loss": 0.0444, "step": 93500 }, { "epoch": 27.0, "eval_cer": 0.3417332402361284, "eval_loss": 1.1671220064163208, "eval_runtime": 162.6056, "eval_samples_per_second": 29.851, "eval_steps_per_second": 3.733, "eval_wer": 1.0578042891634711, "step": 93501 }, { "epoch": 27.144115523465704, "grad_norm": 3.295487880706787, "learning_rate": 8.599759280246371e-06, "loss": 0.0437, "step": 94000 }, { "epoch": 27.288519855595666, "grad_norm": 2.211439847946167, "learning_rate": 7.760001074229832e-06, "loss": 0.0434, "step": 94500 }, { "epoch": 27.432924187725632, "grad_norm": 2.7189860343933105, "learning_rate": 6.9622902166588835e-06, "loss": 0.043, "step": 95000 }, { "epoch": 27.577328519855595, "grad_norm": 2.376002550125122, "learning_rate": 6.206862517619682e-06, "loss": 0.0429, "step": 95500 }, { "epoch": 27.72173285198556, "grad_norm": 17.600614547729492, "learning_rate": 5.493941287938408e-06, "loss": 0.0461, "step": 96000 }, { "epoch": 27.866137184115523, "grad_norm": 4.86808967590332, "learning_rate": 4.82373727316902e-06, "loss": 0.0444, "step": 96500 }, { "epoch": 28.0, "eval_cer": 0.341424931310585, "eval_loss": 1.1997830867767334, "eval_runtime": 170.8505, "eval_samples_per_second": 28.411, "eval_steps_per_second": 3.553, "eval_wer": 1.062065047578469, "step": 96964 }, { "epoch": 28.010397111913356, "grad_norm": 0.7768701314926147, "learning_rate": 4.196448591294788e-06, "loss": 0.042, "step": 97000 }, { "epoch": 28.154801444043322, "grad_norm": 0.37804001569747925, "learning_rate": 3.6122606741632364e-06, "loss": 0.0441, "step": 97500 }, { "epoch": 28.299205776173284, "grad_norm": 1.4847784042358398, "learning_rate": 3.0723847462705177e-06, "loss": 0.0428, "step": 98000 }, { "epoch": 28.44361010830325, "grad_norm": 1.5188604593276978, "learning_rate": 2.57481662155144e-06, "loss": 0.0443, "step": 98500 }, { "epoch": 28.588014440433213, "grad_norm": 0.0017864394467324018, "learning_rate": 2.1208286297203724e-06, "loss": 0.0417, "step": 99000 }, { "epoch": 28.732418772563175, "grad_norm": 0.7578967213630676, "learning_rate": 1.7105549734729718e-06, "loss": 0.0437, "step": 99500 }, { "epoch": 28.87682310469314, "grad_norm": 0.5516202449798584, "learning_rate": 1.3448059871537132e-06, "loss": 0.0439, "step": 100000 }, { "epoch": 29.0, "eval_cer": 0.34057708176534063, "eval_loss": 1.19876229763031, "eval_runtime": 163.1087, "eval_samples_per_second": 29.759, "eval_steps_per_second": 3.721, "eval_wer": 1.0568101121999716, "step": 100427 }, { "epoch": 29.021083032490974, "grad_norm": 0.4156757891178131, "learning_rate": 1.0222238976449748e-06, "loss": 0.0434, "step": 100500 }, { "epoch": 29.16548736462094, "grad_norm": 0.6885621547698975, "learning_rate": 7.43680900583804e-07, "loss": 0.0438, "step": 101000 }, { "epoch": 29.309891696750903, "grad_norm": 0.4854223430156708, "learning_rate": 5.101090358208826e-07, "loss": 0.0396, "step": 101500 }, { "epoch": 29.454296028880865, "grad_norm": 0.3836556673049927, "learning_rate": 3.197013163443396e-07, "loss": 0.043, "step": 102000 }, { "epoch": 29.59870036101083, "grad_norm": 0.2860988974571228, "learning_rate": 1.7354036094071044e-07, "loss": 0.0434, "step": 102500 }, { "epoch": 29.743104693140793, "grad_norm": 0.0012666682014241815, "learning_rate": 7.166937602638978e-08, "loss": 0.042, "step": 103000 }, { "epoch": 29.88750902527076, "grad_norm": 0.378168523311615, "learning_rate": 1.411847552734713e-08, "loss": 0.0441, "step": 103500 }, { "epoch": 29.99148014440433, "eval_cer": 0.34096246792226986, "eval_loss": 1.2041139602661133, "eval_runtime": 160.9897, "eval_samples_per_second": 30.151, "eval_steps_per_second": 3.77, "eval_wer": 1.0593665672489703, "step": 103860 }, { "epoch": 29.99148014440433, "step": 103860, "total_flos": 1.4183586251136793e+20, "train_loss": 0.1725283220464867, "train_runtime": 155289.8066, "train_samples_per_second": 21.404, "train_steps_per_second": 0.669 } ], "logging_steps": 500, "max_steps": 103860, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4183586251136793e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }