{ "best_metric": 0.6916529645033369, "best_model_checkpoint": "d:\\DataTicon\\Whisper-Khmer-Small\\whisper-khmer\\outputs\\whisper-khmer-tiny\\checkpoint-3000", "epoch": 2.7447392497712717, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009149130832570906, "grad_norm": 76.3263931274414, "learning_rate": 5e-06, "loss": 3.7253, "step": 10 }, { "epoch": 0.018298261665141813, "grad_norm": 23.748031616210938, "learning_rate": 1.5e-05, "loss": 2.8873, "step": 20 }, { "epoch": 0.027447392497712716, "grad_norm": 17.664098739624023, "learning_rate": 2.5e-05, "loss": 2.2375, "step": 30 }, { "epoch": 0.036596523330283626, "grad_norm": 12.52354907989502, "learning_rate": 3.5e-05, "loss": 1.8201, "step": 40 }, { "epoch": 0.04574565416285453, "grad_norm": 27.447662353515625, "learning_rate": 4.5e-05, "loss": 1.6313, "step": 50 }, { "epoch": 0.05489478499542543, "grad_norm": 24.413909912109375, "learning_rate": 4.992257664911737e-05, "loss": 1.6002, "step": 60 }, { "epoch": 0.06404391582799634, "grad_norm": 21.101381301879883, "learning_rate": 4.976772994735213e-05, "loss": 1.5404, "step": 70 }, { "epoch": 0.07319304666056725, "grad_norm": 14.289061546325684, "learning_rate": 4.961288324558687e-05, "loss": 1.4752, "step": 80 }, { "epoch": 0.08234217749313816, "grad_norm": 14.361418724060059, "learning_rate": 4.945803654382162e-05, "loss": 1.4342, "step": 90 }, { "epoch": 0.09149130832570906, "grad_norm": 10.036293983459473, "learning_rate": 4.930318984205637e-05, "loss": 1.3983, "step": 100 }, { "epoch": 0.10064043915827996, "grad_norm": 14.588384628295898, "learning_rate": 4.9148343140291115e-05, "loss": 1.3811, "step": 110 }, { "epoch": 0.10978956999085086, "grad_norm": 11.769558906555176, "learning_rate": 4.899349643852586e-05, "loss": 1.3395, "step": 120 }, { "epoch": 0.11893870082342177, "grad_norm": 15.153656959533691, "learning_rate": 4.883864973676061e-05, "loss": 1.3043, "step": 130 }, { "epoch": 0.1280878316559927, "grad_norm": 10.691374778747559, "learning_rate": 4.8683803034995354e-05, "loss": 1.2971, "step": 140 }, { "epoch": 0.1372369624885636, "grad_norm": 18.516103744506836, "learning_rate": 4.85289563332301e-05, "loss": 1.2716, "step": 150 }, { "epoch": 0.1463860933211345, "grad_norm": 10.616347312927246, "learning_rate": 4.837410963146485e-05, "loss": 1.2456, "step": 160 }, { "epoch": 0.1555352241537054, "grad_norm": 14.87991714477539, "learning_rate": 4.82192629296996e-05, "loss": 1.2224, "step": 170 }, { "epoch": 0.16468435498627632, "grad_norm": 19.040470123291016, "learning_rate": 4.806441622793435e-05, "loss": 1.2364, "step": 180 }, { "epoch": 0.17383348581884722, "grad_norm": 18.312976837158203, "learning_rate": 4.7909569526169096e-05, "loss": 1.1906, "step": 190 }, { "epoch": 0.18298261665141813, "grad_norm": 21.12237548828125, "learning_rate": 4.7754722824403844e-05, "loss": 1.1894, "step": 200 }, { "epoch": 0.19213174748398903, "grad_norm": 8.743210792541504, "learning_rate": 4.759987612263859e-05, "loss": 1.1648, "step": 210 }, { "epoch": 0.2012808783165599, "grad_norm": 11.74830150604248, "learning_rate": 4.744502942087334e-05, "loss": 1.1461, "step": 220 }, { "epoch": 0.21043000914913082, "grad_norm": 15.216257095336914, "learning_rate": 4.729018271910808e-05, "loss": 1.1189, "step": 230 }, { "epoch": 0.21957913998170173, "grad_norm": 17.434612274169922, "learning_rate": 4.713533601734284e-05, "loss": 1.126, "step": 240 }, { "epoch": 0.22872827081427263, "grad_norm": 11.15873908996582, "learning_rate": 4.698048931557758e-05, "loss": 1.0959, "step": 250 }, { "epoch": 0.23787740164684354, "grad_norm": 13.587617874145508, "learning_rate": 4.682564261381233e-05, "loss": 1.0969, "step": 260 }, { "epoch": 0.24702653247941445, "grad_norm": 11.1122407913208, "learning_rate": 4.6670795912047076e-05, "loss": 1.0839, "step": 270 }, { "epoch": 0.2561756633119854, "grad_norm": 16.767852783203125, "learning_rate": 4.6515949210281825e-05, "loss": 1.053, "step": 280 }, { "epoch": 0.2653247941445563, "grad_norm": 14.137765884399414, "learning_rate": 4.6361102508516566e-05, "loss": 1.0309, "step": 290 }, { "epoch": 0.2744739249771272, "grad_norm": 13.212748527526855, "learning_rate": 4.620625580675132e-05, "loss": 0.9809, "step": 300 }, { "epoch": 0.2836230558096981, "grad_norm": 12.678833961486816, "learning_rate": 4.605140910498606e-05, "loss": 0.9719, "step": 310 }, { "epoch": 0.292772186642269, "grad_norm": 11.58700180053711, "learning_rate": 4.589656240322082e-05, "loss": 0.9258, "step": 320 }, { "epoch": 0.3019213174748399, "grad_norm": 10.919293403625488, "learning_rate": 4.574171570145556e-05, "loss": 0.8796, "step": 330 }, { "epoch": 0.3110704483074108, "grad_norm": 10.442853927612305, "learning_rate": 4.558686899969031e-05, "loss": 0.8912, "step": 340 }, { "epoch": 0.3202195791399817, "grad_norm": 11.729557991027832, "learning_rate": 4.543202229792506e-05, "loss": 0.8275, "step": 350 }, { "epoch": 0.32936870997255263, "grad_norm": 9.217303276062012, "learning_rate": 4.5277175596159805e-05, "loss": 0.7889, "step": 360 }, { "epoch": 0.33851784080512354, "grad_norm": 13.999395370483398, "learning_rate": 4.512232889439455e-05, "loss": 0.736, "step": 370 }, { "epoch": 0.34766697163769444, "grad_norm": 18.503355026245117, "learning_rate": 4.49674821926293e-05, "loss": 0.7104, "step": 380 }, { "epoch": 0.35681610247026535, "grad_norm": 11.04101848602295, "learning_rate": 4.4812635490864044e-05, "loss": 0.7163, "step": 390 }, { "epoch": 0.36596523330283626, "grad_norm": 9.643781661987305, "learning_rate": 4.465778878909879e-05, "loss": 0.6944, "step": 400 }, { "epoch": 0.37511436413540716, "grad_norm": 8.121737480163574, "learning_rate": 4.450294208733354e-05, "loss": 0.6771, "step": 410 }, { "epoch": 0.38426349496797807, "grad_norm": 12.704200744628906, "learning_rate": 4.434809538556829e-05, "loss": 0.6185, "step": 420 }, { "epoch": 0.3934126258005489, "grad_norm": 8.623883247375488, "learning_rate": 4.419324868380304e-05, "loss": 0.6471, "step": 430 }, { "epoch": 0.4025617566331198, "grad_norm": 9.99401569366455, "learning_rate": 4.4038401982037786e-05, "loss": 0.6112, "step": 440 }, { "epoch": 0.41171088746569073, "grad_norm": 9.302281379699707, "learning_rate": 4.3883555280272534e-05, "loss": 0.6054, "step": 450 }, { "epoch": 0.42086001829826164, "grad_norm": 12.407485961914062, "learning_rate": 4.372870857850728e-05, "loss": 0.6045, "step": 460 }, { "epoch": 0.43000914913083255, "grad_norm": 11.521641731262207, "learning_rate": 4.357386187674203e-05, "loss": 0.5927, "step": 470 }, { "epoch": 0.43915827996340345, "grad_norm": 18.213590621948242, "learning_rate": 4.341901517497677e-05, "loss": 0.5654, "step": 480 }, { "epoch": 0.44830741079597436, "grad_norm": 9.964733123779297, "learning_rate": 4.326416847321153e-05, "loss": 0.5561, "step": 490 }, { "epoch": 0.45745654162854527, "grad_norm": 10.646913528442383, "learning_rate": 4.310932177144627e-05, "loss": 0.5174, "step": 500 }, { "epoch": 0.45745654162854527, "eval_loss": 0.5902902483940125, "eval_runtime": 436.5208, "eval_samples_per_second": 1.766, "eval_steps_per_second": 0.112, "eval_wer": 0.9019247509430313, "step": 500 }, { "epoch": 0.46660567246111617, "grad_norm": 7.100905895233154, "learning_rate": 4.295447506968102e-05, "loss": 0.5449, "step": 510 }, { "epoch": 0.4757548032936871, "grad_norm": 14.554773330688477, "learning_rate": 4.2799628367915767e-05, "loss": 0.5277, "step": 520 }, { "epoch": 0.484903934126258, "grad_norm": 12.082781791687012, "learning_rate": 4.2644781666150515e-05, "loss": 0.5299, "step": 530 }, { "epoch": 0.4940530649588289, "grad_norm": 8.607912063598633, "learning_rate": 4.2489934964385257e-05, "loss": 0.5395, "step": 540 }, { "epoch": 0.5032021957913998, "grad_norm": 9.982528686523438, "learning_rate": 4.233508826262001e-05, "loss": 0.5197, "step": 550 }, { "epoch": 0.5123513266239708, "grad_norm": 12.866645812988281, "learning_rate": 4.2180241560854753e-05, "loss": 0.4857, "step": 560 }, { "epoch": 0.5215004574565416, "grad_norm": 9.12654972076416, "learning_rate": 4.20253948590895e-05, "loss": 0.4852, "step": 570 }, { "epoch": 0.5306495882891126, "grad_norm": 7.0818705558776855, "learning_rate": 4.187054815732425e-05, "loss": 0.5013, "step": 580 }, { "epoch": 0.5397987191216834, "grad_norm": 9.520069122314453, "learning_rate": 4.1715701455559e-05, "loss": 0.4656, "step": 590 }, { "epoch": 0.5489478499542544, "grad_norm": 8.271717071533203, "learning_rate": 4.156085475379375e-05, "loss": 0.4866, "step": 600 }, { "epoch": 0.5580969807868252, "grad_norm": 9.679398536682129, "learning_rate": 4.1406008052028496e-05, "loss": 0.5045, "step": 610 }, { "epoch": 0.5672461116193962, "grad_norm": 9.209792137145996, "learning_rate": 4.125116135026324e-05, "loss": 0.435, "step": 620 }, { "epoch": 0.576395242451967, "grad_norm": 7.2256669998168945, "learning_rate": 4.109631464849799e-05, "loss": 0.4575, "step": 630 }, { "epoch": 0.585544373284538, "grad_norm": 7.8047990798950195, "learning_rate": 4.0941467946732734e-05, "loss": 0.4598, "step": 640 }, { "epoch": 0.5946935041171089, "grad_norm": 7.035597801208496, "learning_rate": 4.078662124496748e-05, "loss": 0.4714, "step": 650 }, { "epoch": 0.6038426349496798, "grad_norm": 7.996973514556885, "learning_rate": 4.063177454320223e-05, "loss": 0.4596, "step": 660 }, { "epoch": 0.6129917657822507, "grad_norm": 6.872828960418701, "learning_rate": 4.047692784143698e-05, "loss": 0.4106, "step": 670 }, { "epoch": 0.6221408966148216, "grad_norm": 6.923854827880859, "learning_rate": 4.032208113967173e-05, "loss": 0.42, "step": 680 }, { "epoch": 0.6312900274473925, "grad_norm": 12.70057487487793, "learning_rate": 4.0167234437906476e-05, "loss": 0.4229, "step": 690 }, { "epoch": 0.6404391582799634, "grad_norm": 6.876515865325928, "learning_rate": 4.0012387736141224e-05, "loss": 0.4114, "step": 700 }, { "epoch": 0.6495882891125343, "grad_norm": 7.49954891204834, "learning_rate": 3.9857541034375966e-05, "loss": 0.4163, "step": 710 }, { "epoch": 0.6587374199451053, "grad_norm": 6.375706672668457, "learning_rate": 3.970269433261072e-05, "loss": 0.4067, "step": 720 }, { "epoch": 0.6678865507776761, "grad_norm": 5.803896427154541, "learning_rate": 3.954784763084546e-05, "loss": 0.4269, "step": 730 }, { "epoch": 0.6770356816102471, "grad_norm": 9.036760330200195, "learning_rate": 3.939300092908022e-05, "loss": 0.4117, "step": 740 }, { "epoch": 0.6861848124428179, "grad_norm": 6.481241226196289, "learning_rate": 3.923815422731496e-05, "loss": 0.4561, "step": 750 }, { "epoch": 0.6953339432753889, "grad_norm": 7.707711219787598, "learning_rate": 3.908330752554971e-05, "loss": 0.4075, "step": 760 }, { "epoch": 0.7044830741079597, "grad_norm": 6.894267559051514, "learning_rate": 3.892846082378446e-05, "loss": 0.4036, "step": 770 }, { "epoch": 0.7136322049405307, "grad_norm": 6.747013568878174, "learning_rate": 3.8773614122019205e-05, "loss": 0.4139, "step": 780 }, { "epoch": 0.7227813357731016, "grad_norm": 8.749561309814453, "learning_rate": 3.861876742025395e-05, "loss": 0.391, "step": 790 }, { "epoch": 0.7319304666056725, "grad_norm": 6.197606086730957, "learning_rate": 3.84639207184887e-05, "loss": 0.4115, "step": 800 }, { "epoch": 0.7410795974382434, "grad_norm": 6.012449264526367, "learning_rate": 3.8309074016723444e-05, "loss": 0.43, "step": 810 }, { "epoch": 0.7502287282708143, "grad_norm": 9.235795021057129, "learning_rate": 3.815422731495819e-05, "loss": 0.4013, "step": 820 }, { "epoch": 0.7593778591033852, "grad_norm": 6.508467197418213, "learning_rate": 3.799938061319294e-05, "loss": 0.4084, "step": 830 }, { "epoch": 0.7685269899359561, "grad_norm": 12.164517402648926, "learning_rate": 3.784453391142769e-05, "loss": 0.422, "step": 840 }, { "epoch": 0.777676120768527, "grad_norm": 6.47005033493042, "learning_rate": 3.768968720966244e-05, "loss": 0.3806, "step": 850 }, { "epoch": 0.7868252516010978, "grad_norm": 6.4245476722717285, "learning_rate": 3.7534840507897186e-05, "loss": 0.3772, "step": 860 }, { "epoch": 0.7959743824336688, "grad_norm": 6.941617965698242, "learning_rate": 3.737999380613193e-05, "loss": 0.3621, "step": 870 }, { "epoch": 0.8051235132662397, "grad_norm": 6.679232120513916, "learning_rate": 3.722514710436668e-05, "loss": 0.3699, "step": 880 }, { "epoch": 0.8142726440988106, "grad_norm": 7.287721157073975, "learning_rate": 3.7070300402601424e-05, "loss": 0.3728, "step": 890 }, { "epoch": 0.8234217749313815, "grad_norm": 7.297004699707031, "learning_rate": 3.691545370083617e-05, "loss": 0.3823, "step": 900 }, { "epoch": 0.8325709057639524, "grad_norm": 5.730973720550537, "learning_rate": 3.676060699907092e-05, "loss": 0.3716, "step": 910 }, { "epoch": 0.8417200365965233, "grad_norm": 8.157340049743652, "learning_rate": 3.660576029730567e-05, "loss": 0.3731, "step": 920 }, { "epoch": 0.8508691674290942, "grad_norm": 8.863473892211914, "learning_rate": 3.645091359554042e-05, "loss": 0.3445, "step": 930 }, { "epoch": 0.8600182982616651, "grad_norm": 5.911675453186035, "learning_rate": 3.6296066893775166e-05, "loss": 0.3671, "step": 940 }, { "epoch": 0.869167429094236, "grad_norm": 6.246954441070557, "learning_rate": 3.6141220192009915e-05, "loss": 0.3876, "step": 950 }, { "epoch": 0.8783165599268069, "grad_norm": 4.594511032104492, "learning_rate": 3.5986373490244656e-05, "loss": 0.3637, "step": 960 }, { "epoch": 0.8874656907593779, "grad_norm": 7.323066234588623, "learning_rate": 3.583152678847941e-05, "loss": 0.3624, "step": 970 }, { "epoch": 0.8966148215919487, "grad_norm": 6.408933639526367, "learning_rate": 3.567668008671415e-05, "loss": 0.3496, "step": 980 }, { "epoch": 0.9057639524245197, "grad_norm": 5.430429935455322, "learning_rate": 3.552183338494891e-05, "loss": 0.368, "step": 990 }, { "epoch": 0.9149130832570905, "grad_norm": 7.088529109954834, "learning_rate": 3.536698668318365e-05, "loss": 0.3655, "step": 1000 }, { "epoch": 0.9149130832570905, "eval_loss": 0.3907645046710968, "eval_runtime": 424.8249, "eval_samples_per_second": 1.815, "eval_steps_per_second": 0.115, "eval_wer": 0.8130380114130961, "step": 1000 }, { "epoch": 0.9240622140896615, "grad_norm": 5.1332292556762695, "learning_rate": 3.52121399814184e-05, "loss": 0.3153, "step": 1010 }, { "epoch": 0.9332113449222323, "grad_norm": 5.477539539337158, "learning_rate": 3.505729327965315e-05, "loss": 0.3383, "step": 1020 }, { "epoch": 0.9423604757548033, "grad_norm": 6.7095866203308105, "learning_rate": 3.4902446577887895e-05, "loss": 0.3351, "step": 1030 }, { "epoch": 0.9515096065873742, "grad_norm": 4.704165935516357, "learning_rate": 3.474759987612264e-05, "loss": 0.3144, "step": 1040 }, { "epoch": 0.9606587374199451, "grad_norm": 9.83104419708252, "learning_rate": 3.459275317435739e-05, "loss": 0.3667, "step": 1050 }, { "epoch": 0.969807868252516, "grad_norm": 6.172043323516846, "learning_rate": 3.4437906472592134e-05, "loss": 0.3298, "step": 1060 }, { "epoch": 0.9789569990850869, "grad_norm": 6.027336597442627, "learning_rate": 3.428305977082688e-05, "loss": 0.3269, "step": 1070 }, { "epoch": 0.9881061299176578, "grad_norm": 6.435912132263184, "learning_rate": 3.412821306906163e-05, "loss": 0.3203, "step": 1080 }, { "epoch": 0.9972552607502287, "grad_norm": 7.0265913009643555, "learning_rate": 3.397336636729638e-05, "loss": 0.3413, "step": 1090 }, { "epoch": 1.0064043915827996, "grad_norm": 6.904513835906982, "learning_rate": 3.381851966553112e-05, "loss": 0.3164, "step": 1100 }, { "epoch": 1.0155535224153704, "grad_norm": 5.236996173858643, "learning_rate": 3.3663672963765876e-05, "loss": 0.2675, "step": 1110 }, { "epoch": 1.0247026532479415, "grad_norm": 6.988259792327881, "learning_rate": 3.350882626200062e-05, "loss": 0.3134, "step": 1120 }, { "epoch": 1.0338517840805124, "grad_norm": 8.87595272064209, "learning_rate": 3.335397956023537e-05, "loss": 0.3065, "step": 1130 }, { "epoch": 1.0430009149130832, "grad_norm": 7.2589287757873535, "learning_rate": 3.3199132858470114e-05, "loss": 0.2798, "step": 1140 }, { "epoch": 1.052150045745654, "grad_norm": 7.233737945556641, "learning_rate": 3.304428615670486e-05, "loss": 0.2954, "step": 1150 }, { "epoch": 1.0612991765782251, "grad_norm": 4.9386887550354, "learning_rate": 3.288943945493961e-05, "loss": 0.2959, "step": 1160 }, { "epoch": 1.070448307410796, "grad_norm": 6.335395812988281, "learning_rate": 3.273459275317436e-05, "loss": 0.295, "step": 1170 }, { "epoch": 1.0795974382433668, "grad_norm": 6.33104944229126, "learning_rate": 3.257974605140911e-05, "loss": 0.2997, "step": 1180 }, { "epoch": 1.0887465690759377, "grad_norm": 5.694860458374023, "learning_rate": 3.2424899349643856e-05, "loss": 0.262, "step": 1190 }, { "epoch": 1.0978956999085088, "grad_norm": 5.777647018432617, "learning_rate": 3.2270052647878605e-05, "loss": 0.2822, "step": 1200 }, { "epoch": 1.1070448307410796, "grad_norm": 8.250167846679688, "learning_rate": 3.2115205946113346e-05, "loss": 0.2778, "step": 1210 }, { "epoch": 1.1161939615736505, "grad_norm": 4.275432586669922, "learning_rate": 3.19603592443481e-05, "loss": 0.2793, "step": 1220 }, { "epoch": 1.1253430924062213, "grad_norm": 4.831576824188232, "learning_rate": 3.180551254258284e-05, "loss": 0.2815, "step": 1230 }, { "epoch": 1.1344922232387924, "grad_norm": 5.6868720054626465, "learning_rate": 3.165066584081759e-05, "loss": 0.3085, "step": 1240 }, { "epoch": 1.1436413540713632, "grad_norm": 5.878891944885254, "learning_rate": 3.149581913905234e-05, "loss": 0.2543, "step": 1250 }, { "epoch": 1.152790484903934, "grad_norm": 5.85615348815918, "learning_rate": 3.134097243728709e-05, "loss": 0.2538, "step": 1260 }, { "epoch": 1.161939615736505, "grad_norm": 4.6179118156433105, "learning_rate": 3.118612573552184e-05, "loss": 0.2565, "step": 1270 }, { "epoch": 1.171088746569076, "grad_norm": 5.257189750671387, "learning_rate": 3.1031279033756585e-05, "loss": 0.2681, "step": 1280 }, { "epoch": 1.1802378774016469, "grad_norm": 4.76942253112793, "learning_rate": 3.087643233199133e-05, "loss": 0.2826, "step": 1290 }, { "epoch": 1.1893870082342177, "grad_norm": 5.82953405380249, "learning_rate": 3.072158563022608e-05, "loss": 0.2826, "step": 1300 }, { "epoch": 1.1985361390667886, "grad_norm": 5.2305731773376465, "learning_rate": 3.0566738928460824e-05, "loss": 0.2598, "step": 1310 }, { "epoch": 1.2076852698993596, "grad_norm": 5.51474666595459, "learning_rate": 3.0411892226695572e-05, "loss": 0.2685, "step": 1320 }, { "epoch": 1.2168344007319305, "grad_norm": 7.23142147064209, "learning_rate": 3.025704552493032e-05, "loss": 0.285, "step": 1330 }, { "epoch": 1.2259835315645013, "grad_norm": 5.186690807342529, "learning_rate": 3.010219882316507e-05, "loss": 0.2872, "step": 1340 }, { "epoch": 1.2351326623970722, "grad_norm": 5.723147392272949, "learning_rate": 2.9947352121399814e-05, "loss": 0.2631, "step": 1350 }, { "epoch": 1.2442817932296433, "grad_norm": 4.612165451049805, "learning_rate": 2.9792505419634566e-05, "loss": 0.2966, "step": 1360 }, { "epoch": 1.2534309240622141, "grad_norm": 5.467476844787598, "learning_rate": 2.963765871786931e-05, "loss": 0.2308, "step": 1370 }, { "epoch": 1.262580054894785, "grad_norm": 4.7134785652160645, "learning_rate": 2.9482812016104063e-05, "loss": 0.2781, "step": 1380 }, { "epoch": 1.2717291857273558, "grad_norm": 4.138732433319092, "learning_rate": 2.9327965314338808e-05, "loss": 0.2452, "step": 1390 }, { "epoch": 1.2808783165599267, "grad_norm": 4.39865255355835, "learning_rate": 2.9173118612573553e-05, "loss": 0.2759, "step": 1400 }, { "epoch": 1.2900274473924978, "grad_norm": 6.269981384277344, "learning_rate": 2.9018271910808305e-05, "loss": 0.2802, "step": 1410 }, { "epoch": 1.2991765782250686, "grad_norm": 5.472837924957275, "learning_rate": 2.886342520904305e-05, "loss": 0.282, "step": 1420 }, { "epoch": 1.3083257090576395, "grad_norm": 5.290619850158691, "learning_rate": 2.8708578507277795e-05, "loss": 0.2443, "step": 1430 }, { "epoch": 1.3174748398902105, "grad_norm": 4.903107643127441, "learning_rate": 2.8553731805512546e-05, "loss": 0.255, "step": 1440 }, { "epoch": 1.3266239707227814, "grad_norm": 5.144070625305176, "learning_rate": 2.839888510374729e-05, "loss": 0.2375, "step": 1450 }, { "epoch": 1.3357731015553522, "grad_norm": 4.945043087005615, "learning_rate": 2.8244038401982036e-05, "loss": 0.2381, "step": 1460 }, { "epoch": 1.344922232387923, "grad_norm": 5.670736789703369, "learning_rate": 2.8089191700216788e-05, "loss": 0.2398, "step": 1470 }, { "epoch": 1.354071363220494, "grad_norm": 5.526036739349365, "learning_rate": 2.7934344998451533e-05, "loss": 0.2748, "step": 1480 }, { "epoch": 1.363220494053065, "grad_norm": 4.805148601531982, "learning_rate": 2.7779498296686278e-05, "loss": 0.2412, "step": 1490 }, { "epoch": 1.3723696248856359, "grad_norm": 4.122767925262451, "learning_rate": 2.762465159492103e-05, "loss": 0.2805, "step": 1500 }, { "epoch": 1.3723696248856359, "eval_loss": 0.33601683378219604, "eval_runtime": 432.4301, "eval_samples_per_second": 1.783, "eval_steps_per_second": 0.113, "eval_wer": 0.7586807234742238, "step": 1500 }, { "epoch": 1.3815187557182067, "grad_norm": 4.068643569946289, "learning_rate": 2.7469804893155775e-05, "loss": 0.2527, "step": 1510 }, { "epoch": 1.3906678865507778, "grad_norm": 5.818108081817627, "learning_rate": 2.7314958191390527e-05, "loss": 0.2707, "step": 1520 }, { "epoch": 1.3998170173833486, "grad_norm": 6.448596477508545, "learning_rate": 2.7160111489625272e-05, "loss": 0.2466, "step": 1530 }, { "epoch": 1.4089661482159195, "grad_norm": 6.120127201080322, "learning_rate": 2.7005264787860017e-05, "loss": 0.259, "step": 1540 }, { "epoch": 1.4181152790484903, "grad_norm": 4.396270751953125, "learning_rate": 2.685041808609477e-05, "loss": 0.2505, "step": 1550 }, { "epoch": 1.4272644098810612, "grad_norm": 3.8976686000823975, "learning_rate": 2.6695571384329514e-05, "loss": 0.2429, "step": 1560 }, { "epoch": 1.4364135407136323, "grad_norm": 4.241589069366455, "learning_rate": 2.6540724682564262e-05, "loss": 0.2424, "step": 1570 }, { "epoch": 1.445562671546203, "grad_norm": 6.113090515136719, "learning_rate": 2.638587798079901e-05, "loss": 0.2642, "step": 1580 }, { "epoch": 1.454711802378774, "grad_norm": 4.122611999511719, "learning_rate": 2.623103127903376e-05, "loss": 0.2259, "step": 1590 }, { "epoch": 1.463860933211345, "grad_norm": 4.869472026824951, "learning_rate": 2.6076184577268504e-05, "loss": 0.2542, "step": 1600 }, { "epoch": 1.4730100640439159, "grad_norm": 4.926369667053223, "learning_rate": 2.5921337875503256e-05, "loss": 0.2789, "step": 1610 }, { "epoch": 1.4821591948764867, "grad_norm": 5.319028854370117, "learning_rate": 2.5766491173738e-05, "loss": 0.265, "step": 1620 }, { "epoch": 1.4913083257090576, "grad_norm": 6.620922088623047, "learning_rate": 2.5611644471972746e-05, "loss": 0.2442, "step": 1630 }, { "epoch": 1.5004574565416284, "grad_norm": 5.458837032318115, "learning_rate": 2.5456797770207498e-05, "loss": 0.252, "step": 1640 }, { "epoch": 1.5096065873741995, "grad_norm": 5.415153503417969, "learning_rate": 2.5301951068442243e-05, "loss": 0.237, "step": 1650 }, { "epoch": 1.5187557182067704, "grad_norm": 4.952278137207031, "learning_rate": 2.5147104366676995e-05, "loss": 0.2577, "step": 1660 }, { "epoch": 1.5279048490393414, "grad_norm": 4.834970951080322, "learning_rate": 2.499225766491174e-05, "loss": 0.2452, "step": 1670 }, { "epoch": 1.5370539798719123, "grad_norm": 5.410050392150879, "learning_rate": 2.4837410963146488e-05, "loss": 0.2258, "step": 1680 }, { "epoch": 1.5462031107044831, "grad_norm": 4.104517936706543, "learning_rate": 2.4682564261381233e-05, "loss": 0.229, "step": 1690 }, { "epoch": 1.555352241537054, "grad_norm": 4.475819110870361, "learning_rate": 2.452771755961598e-05, "loss": 0.2589, "step": 1700 }, { "epoch": 1.5645013723696248, "grad_norm": 3.8395609855651855, "learning_rate": 2.437287085785073e-05, "loss": 0.2269, "step": 1710 }, { "epoch": 1.5736505032021957, "grad_norm": 4.9355621337890625, "learning_rate": 2.4218024156085475e-05, "loss": 0.2625, "step": 1720 }, { "epoch": 1.5827996340347665, "grad_norm": 4.053934097290039, "learning_rate": 2.4063177454320223e-05, "loss": 0.2559, "step": 1730 }, { "epoch": 1.5919487648673376, "grad_norm": 5.001983642578125, "learning_rate": 2.3908330752554972e-05, "loss": 0.23, "step": 1740 }, { "epoch": 1.6010978956999085, "grad_norm": 5.705740928649902, "learning_rate": 2.375348405078972e-05, "loss": 0.2173, "step": 1750 }, { "epoch": 1.6102470265324795, "grad_norm": 4.854909420013428, "learning_rate": 2.3598637349024465e-05, "loss": 0.2297, "step": 1760 }, { "epoch": 1.6193961573650504, "grad_norm": 3.785277843475342, "learning_rate": 2.3443790647259214e-05, "loss": 0.2065, "step": 1770 }, { "epoch": 1.6285452881976212, "grad_norm": 5.307765960693359, "learning_rate": 2.3288943945493962e-05, "loss": 0.2246, "step": 1780 }, { "epoch": 1.637694419030192, "grad_norm": 5.032717704772949, "learning_rate": 2.3134097243728707e-05, "loss": 0.2168, "step": 1790 }, { "epoch": 1.646843549862763, "grad_norm": 4.665537357330322, "learning_rate": 2.2979250541963456e-05, "loss": 0.2409, "step": 1800 }, { "epoch": 1.6559926806953338, "grad_norm": 4.126980304718018, "learning_rate": 2.2824403840198204e-05, "loss": 0.2397, "step": 1810 }, { "epoch": 1.6651418115279049, "grad_norm": 5.973440170288086, "learning_rate": 2.2669557138432952e-05, "loss": 0.2654, "step": 1820 }, { "epoch": 1.6742909423604757, "grad_norm": 4.972531795501709, "learning_rate": 2.25147104366677e-05, "loss": 0.2636, "step": 1830 }, { "epoch": 1.6834400731930468, "grad_norm": 6.962503910064697, "learning_rate": 2.235986373490245e-05, "loss": 0.2629, "step": 1840 }, { "epoch": 1.6925892040256176, "grad_norm": 4.002923488616943, "learning_rate": 2.2205017033137198e-05, "loss": 0.2333, "step": 1850 }, { "epoch": 1.7017383348581885, "grad_norm": 5.305150985717773, "learning_rate": 2.2050170331371943e-05, "loss": 0.2535, "step": 1860 }, { "epoch": 1.7108874656907593, "grad_norm": 4.577486038208008, "learning_rate": 2.189532362960669e-05, "loss": 0.2307, "step": 1870 }, { "epoch": 1.7200365965233302, "grad_norm": 4.220026016235352, "learning_rate": 2.174047692784144e-05, "loss": 0.2461, "step": 1880 }, { "epoch": 1.729185727355901, "grad_norm": 5.4357428550720215, "learning_rate": 2.1585630226076188e-05, "loss": 0.2297, "step": 1890 }, { "epoch": 1.738334858188472, "grad_norm": 5.218511581420898, "learning_rate": 2.1430783524310933e-05, "loss": 0.2419, "step": 1900 }, { "epoch": 1.747483989021043, "grad_norm": 6.166689395904541, "learning_rate": 2.127593682254568e-05, "loss": 0.2471, "step": 1910 }, { "epoch": 1.756633119853614, "grad_norm": 5.226531982421875, "learning_rate": 2.112109012078043e-05, "loss": 0.238, "step": 1920 }, { "epoch": 1.7657822506861849, "grad_norm": 6.10182523727417, "learning_rate": 2.0966243419015175e-05, "loss": 0.2654, "step": 1930 }, { "epoch": 1.7749313815187557, "grad_norm": 4.4128737449646, "learning_rate": 2.0811396717249923e-05, "loss": 0.23, "step": 1940 }, { "epoch": 1.7840805123513266, "grad_norm": 4.541961193084717, "learning_rate": 2.065655001548467e-05, "loss": 0.2067, "step": 1950 }, { "epoch": 1.7932296431838974, "grad_norm": 8.150908470153809, "learning_rate": 2.050170331371942e-05, "loss": 0.224, "step": 1960 }, { "epoch": 1.8023787740164683, "grad_norm": 4.411103248596191, "learning_rate": 2.0346856611954165e-05, "loss": 0.2244, "step": 1970 }, { "epoch": 1.8115279048490394, "grad_norm": 4.345833778381348, "learning_rate": 2.0192009910188913e-05, "loss": 0.225, "step": 1980 }, { "epoch": 1.8206770356816102, "grad_norm": 4.550020694732666, "learning_rate": 2.0037163208423662e-05, "loss": 0.2406, "step": 1990 }, { "epoch": 1.8298261665141813, "grad_norm": 3.8560264110565186, "learning_rate": 1.988231650665841e-05, "loss": 0.2461, "step": 2000 }, { "epoch": 1.8298261665141813, "eval_loss": 0.29912057518959045, "eval_runtime": 422.9004, "eval_samples_per_second": 1.823, "eval_steps_per_second": 0.116, "eval_wer": 0.7281651997291808, "step": 2000 }, { "epoch": 1.8389752973467521, "grad_norm": 5.020371437072754, "learning_rate": 1.9727469804893155e-05, "loss": 0.2257, "step": 2010 }, { "epoch": 1.848124428179323, "grad_norm": 6.07639741897583, "learning_rate": 1.9572623103127904e-05, "loss": 0.2146, "step": 2020 }, { "epoch": 1.8572735590118938, "grad_norm": 5.103982925415039, "learning_rate": 1.9417776401362652e-05, "loss": 0.2248, "step": 2030 }, { "epoch": 1.8664226898444647, "grad_norm": 5.3223042488098145, "learning_rate": 1.9262929699597397e-05, "loss": 0.2162, "step": 2040 }, { "epoch": 1.8755718206770355, "grad_norm": 4.5631103515625, "learning_rate": 1.9108082997832146e-05, "loss": 0.2279, "step": 2050 }, { "epoch": 1.8847209515096066, "grad_norm": 4.72071647644043, "learning_rate": 1.8953236296066894e-05, "loss": 0.251, "step": 2060 }, { "epoch": 1.8938700823421775, "grad_norm": 4.969239234924316, "learning_rate": 1.8798389594301642e-05, "loss": 0.2172, "step": 2070 }, { "epoch": 1.9030192131747485, "grad_norm": 4.407639026641846, "learning_rate": 1.864354289253639e-05, "loss": 0.2098, "step": 2080 }, { "epoch": 1.9121683440073194, "grad_norm": 3.3802950382232666, "learning_rate": 1.8488696190771136e-05, "loss": 0.2192, "step": 2090 }, { "epoch": 1.9213174748398902, "grad_norm": 4.947459697723389, "learning_rate": 1.8333849489005884e-05, "loss": 0.2278, "step": 2100 }, { "epoch": 1.930466605672461, "grad_norm": 4.750110626220703, "learning_rate": 1.8179002787240633e-05, "loss": 0.2185, "step": 2110 }, { "epoch": 1.939615736505032, "grad_norm": 4.515120506286621, "learning_rate": 1.802415608547538e-05, "loss": 0.2045, "step": 2120 }, { "epoch": 1.9487648673376028, "grad_norm": 4.280106067657471, "learning_rate": 1.786930938371013e-05, "loss": 0.2051, "step": 2130 }, { "epoch": 1.9579139981701739, "grad_norm": 4.002866268157959, "learning_rate": 1.7714462681944878e-05, "loss": 0.2301, "step": 2140 }, { "epoch": 1.9670631290027447, "grad_norm": 4.178459644317627, "learning_rate": 1.7559615980179623e-05, "loss": 0.2202, "step": 2150 }, { "epoch": 1.9762122598353158, "grad_norm": 6.406257629394531, "learning_rate": 1.740476927841437e-05, "loss": 0.225, "step": 2160 }, { "epoch": 1.9853613906678866, "grad_norm": 4.606039524078369, "learning_rate": 1.724992257664912e-05, "loss": 0.2446, "step": 2170 }, { "epoch": 1.9945105215004575, "grad_norm": 4.238482475280762, "learning_rate": 1.7095075874883865e-05, "loss": 0.2524, "step": 2180 }, { "epoch": 2.0036596523330283, "grad_norm": 3.64787220954895, "learning_rate": 1.6940229173118613e-05, "loss": 0.218, "step": 2190 }, { "epoch": 2.012808783165599, "grad_norm": 3.7717037200927734, "learning_rate": 1.6785382471353362e-05, "loss": 0.1669, "step": 2200 }, { "epoch": 2.02195791399817, "grad_norm": 2.716965675354004, "learning_rate": 1.663053576958811e-05, "loss": 0.1806, "step": 2210 }, { "epoch": 2.031107044830741, "grad_norm": 4.059733867645264, "learning_rate": 1.6475689067822855e-05, "loss": 0.186, "step": 2220 }, { "epoch": 2.040256175663312, "grad_norm": 4.125363349914551, "learning_rate": 1.6320842366057604e-05, "loss": 0.1872, "step": 2230 }, { "epoch": 2.049405306495883, "grad_norm": 3.4910032749176025, "learning_rate": 1.6165995664292352e-05, "loss": 0.1519, "step": 2240 }, { "epoch": 2.058554437328454, "grad_norm": 2.8993113040924072, "learning_rate": 1.6011148962527097e-05, "loss": 0.1744, "step": 2250 }, { "epoch": 2.0677035681610247, "grad_norm": 4.730359077453613, "learning_rate": 1.5856302260761845e-05, "loss": 0.1627, "step": 2260 }, { "epoch": 2.0768526989935956, "grad_norm": 5.577477931976318, "learning_rate": 1.5701455558996594e-05, "loss": 0.1753, "step": 2270 }, { "epoch": 2.0860018298261664, "grad_norm": 4.823721885681152, "learning_rate": 1.5546608857231342e-05, "loss": 0.1821, "step": 2280 }, { "epoch": 2.0951509606587373, "grad_norm": 3.287593364715576, "learning_rate": 1.5391762155466087e-05, "loss": 0.1573, "step": 2290 }, { "epoch": 2.104300091491308, "grad_norm": 5.850045204162598, "learning_rate": 1.5236915453700837e-05, "loss": 0.1786, "step": 2300 }, { "epoch": 2.1134492223238794, "grad_norm": 4.332837104797363, "learning_rate": 1.5082068751935586e-05, "loss": 0.1884, "step": 2310 }, { "epoch": 2.1225983531564503, "grad_norm": 3.266853094100952, "learning_rate": 1.4927222050170331e-05, "loss": 0.1589, "step": 2320 }, { "epoch": 2.131747483989021, "grad_norm": 3.742208242416382, "learning_rate": 1.477237534840508e-05, "loss": 0.1691, "step": 2330 }, { "epoch": 2.140896614821592, "grad_norm": 4.098796844482422, "learning_rate": 1.4617528646639828e-05, "loss": 0.1622, "step": 2340 }, { "epoch": 2.150045745654163, "grad_norm": 5.091181755065918, "learning_rate": 1.4462681944874576e-05, "loss": 0.185, "step": 2350 }, { "epoch": 2.1591948764867337, "grad_norm": 3.2334043979644775, "learning_rate": 1.4307835243109321e-05, "loss": 0.1573, "step": 2360 }, { "epoch": 2.1683440073193045, "grad_norm": 3.941044569015503, "learning_rate": 1.415298854134407e-05, "loss": 0.17, "step": 2370 }, { "epoch": 2.1774931381518754, "grad_norm": 4.680139541625977, "learning_rate": 1.3998141839578818e-05, "loss": 0.1652, "step": 2380 }, { "epoch": 2.1866422689844462, "grad_norm": 3.671124219894409, "learning_rate": 1.3843295137813565e-05, "loss": 0.1637, "step": 2390 }, { "epoch": 2.1957913998170175, "grad_norm": 3.4199767112731934, "learning_rate": 1.3688448436048313e-05, "loss": 0.1822, "step": 2400 }, { "epoch": 2.2049405306495884, "grad_norm": 4.193777084350586, "learning_rate": 1.353360173428306e-05, "loss": 0.1724, "step": 2410 }, { "epoch": 2.2140896614821592, "grad_norm": 3.5047738552093506, "learning_rate": 1.3378755032517808e-05, "loss": 0.1661, "step": 2420 }, { "epoch": 2.22323879231473, "grad_norm": 4.056273460388184, "learning_rate": 1.3223908330752555e-05, "loss": 0.1476, "step": 2430 }, { "epoch": 2.232387923147301, "grad_norm": 5.089756488800049, "learning_rate": 1.3069061628987303e-05, "loss": 0.1785, "step": 2440 }, { "epoch": 2.241537053979872, "grad_norm": 3.5870766639709473, "learning_rate": 1.2914214927222052e-05, "loss": 0.1835, "step": 2450 }, { "epoch": 2.2506861848124426, "grad_norm": 3.9031713008880615, "learning_rate": 1.2759368225456797e-05, "loss": 0.1725, "step": 2460 }, { "epoch": 2.259835315645014, "grad_norm": 4.2854437828063965, "learning_rate": 1.2604521523691545e-05, "loss": 0.1774, "step": 2470 }, { "epoch": 2.268984446477585, "grad_norm": 4.6277756690979, "learning_rate": 1.2449674821926294e-05, "loss": 0.1583, "step": 2480 }, { "epoch": 2.2781335773101556, "grad_norm": 5.180362224578857, "learning_rate": 1.229482812016104e-05, "loss": 0.1636, "step": 2490 }, { "epoch": 2.2872827081427265, "grad_norm": 2.9935238361358643, "learning_rate": 1.2139981418395789e-05, "loss": 0.1486, "step": 2500 }, { "epoch": 2.2872827081427265, "eval_loss": 0.2814071476459503, "eval_runtime": 424.1932, "eval_samples_per_second": 1.818, "eval_steps_per_second": 0.116, "eval_wer": 0.7055808105232615, "step": 2500 }, { "epoch": 2.2964318389752973, "grad_norm": 3.3283779621124268, "learning_rate": 1.1985134716630536e-05, "loss": 0.158, "step": 2510 }, { "epoch": 2.305580969807868, "grad_norm": 4.186689376831055, "learning_rate": 1.1830288014865284e-05, "loss": 0.1598, "step": 2520 }, { "epoch": 2.314730100640439, "grad_norm": 3.5572612285614014, "learning_rate": 1.167544131310003e-05, "loss": 0.1492, "step": 2530 }, { "epoch": 2.32387923147301, "grad_norm": 2.8076884746551514, "learning_rate": 1.1520594611334779e-05, "loss": 0.1637, "step": 2540 }, { "epoch": 2.3330283623055807, "grad_norm": 4.602914810180664, "learning_rate": 1.1365747909569528e-05, "loss": 0.1526, "step": 2550 }, { "epoch": 2.342177493138152, "grad_norm": 2.5850772857666016, "learning_rate": 1.1210901207804274e-05, "loss": 0.159, "step": 2560 }, { "epoch": 2.351326623970723, "grad_norm": 5.045381546020508, "learning_rate": 1.1056054506039023e-05, "loss": 0.1503, "step": 2570 }, { "epoch": 2.3604757548032937, "grad_norm": 4.628170967102051, "learning_rate": 1.090120780427377e-05, "loss": 0.159, "step": 2580 }, { "epoch": 2.3696248856358646, "grad_norm": 3.4683902263641357, "learning_rate": 1.0746361102508518e-05, "loss": 0.1613, "step": 2590 }, { "epoch": 2.3787740164684354, "grad_norm": 4.1546525955200195, "learning_rate": 1.0591514400743265e-05, "loss": 0.1482, "step": 2600 }, { "epoch": 2.3879231473010063, "grad_norm": 5.595340251922607, "learning_rate": 1.0436667698978013e-05, "loss": 0.1654, "step": 2610 }, { "epoch": 2.397072278133577, "grad_norm": 4.809768199920654, "learning_rate": 1.028182099721276e-05, "loss": 0.1457, "step": 2620 }, { "epoch": 2.4062214089661484, "grad_norm": 3.541982889175415, "learning_rate": 1.0126974295447506e-05, "loss": 0.163, "step": 2630 }, { "epoch": 2.4153705397987193, "grad_norm": 5.883151054382324, "learning_rate": 9.972127593682255e-06, "loss": 0.1761, "step": 2640 }, { "epoch": 2.42451967063129, "grad_norm": 4.718671798706055, "learning_rate": 9.817280891917002e-06, "loss": 0.1562, "step": 2650 }, { "epoch": 2.433668801463861, "grad_norm": 3.135131597518921, "learning_rate": 9.66243419015175e-06, "loss": 0.1669, "step": 2660 }, { "epoch": 2.442817932296432, "grad_norm": 5.202821254730225, "learning_rate": 9.507587488386498e-06, "loss": 0.1748, "step": 2670 }, { "epoch": 2.4519670631290027, "grad_norm": 5.344453811645508, "learning_rate": 9.352740786621247e-06, "loss": 0.1641, "step": 2680 }, { "epoch": 2.4611161939615736, "grad_norm": 3.3761284351348877, "learning_rate": 9.197894084855993e-06, "loss": 0.1574, "step": 2690 }, { "epoch": 2.4702653247941444, "grad_norm": 5.866576671600342, "learning_rate": 9.04304738309074e-06, "loss": 0.1621, "step": 2700 }, { "epoch": 2.4794144556267153, "grad_norm": 4.291085720062256, "learning_rate": 8.888200681325489e-06, "loss": 0.1614, "step": 2710 }, { "epoch": 2.4885635864592865, "grad_norm": 2.837286949157715, "learning_rate": 8.733353979560235e-06, "loss": 0.1654, "step": 2720 }, { "epoch": 2.4977127172918574, "grad_norm": 3.848227024078369, "learning_rate": 8.578507277794984e-06, "loss": 0.1578, "step": 2730 }, { "epoch": 2.5068618481244282, "grad_norm": 3.820240020751953, "learning_rate": 8.42366057602973e-06, "loss": 0.1627, "step": 2740 }, { "epoch": 2.516010978956999, "grad_norm": 3.1845788955688477, "learning_rate": 8.268813874264479e-06, "loss": 0.1718, "step": 2750 }, { "epoch": 2.52516010978957, "grad_norm": 4.4272236824035645, "learning_rate": 8.113967172499226e-06, "loss": 0.1624, "step": 2760 }, { "epoch": 2.534309240622141, "grad_norm": 3.211336374282837, "learning_rate": 7.959120470733972e-06, "loss": 0.1571, "step": 2770 }, { "epoch": 2.5434583714547117, "grad_norm": 3.920867443084717, "learning_rate": 7.80427376896872e-06, "loss": 0.1499, "step": 2780 }, { "epoch": 2.552607502287283, "grad_norm": 5.123950481414795, "learning_rate": 7.64942706720347e-06, "loss": 0.1475, "step": 2790 }, { "epoch": 2.5617566331198534, "grad_norm": 3.7110486030578613, "learning_rate": 7.494580365438217e-06, "loss": 0.1552, "step": 2800 }, { "epoch": 2.5709057639524246, "grad_norm": 4.068341255187988, "learning_rate": 7.3397336636729635e-06, "loss": 0.1494, "step": 2810 }, { "epoch": 2.5800548947849955, "grad_norm": 4.653831958770752, "learning_rate": 7.184886961907712e-06, "loss": 0.161, "step": 2820 }, { "epoch": 2.5892040256175664, "grad_norm": 3.5324552059173584, "learning_rate": 7.0300402601424595e-06, "loss": 0.16, "step": 2830 }, { "epoch": 2.598353156450137, "grad_norm": 5.100922107696533, "learning_rate": 6.875193558377208e-06, "loss": 0.1549, "step": 2840 }, { "epoch": 2.607502287282708, "grad_norm": 3.772149085998535, "learning_rate": 6.720346856611955e-06, "loss": 0.1613, "step": 2850 }, { "epoch": 2.616651418115279, "grad_norm": 4.288483619689941, "learning_rate": 6.565500154846701e-06, "loss": 0.1605, "step": 2860 }, { "epoch": 2.6258005489478498, "grad_norm": 3.9227993488311768, "learning_rate": 6.41065345308145e-06, "loss": 0.1538, "step": 2870 }, { "epoch": 2.634949679780421, "grad_norm": 3.3688392639160156, "learning_rate": 6.255806751316197e-06, "loss": 0.173, "step": 2880 }, { "epoch": 2.644098810612992, "grad_norm": 3.6099278926849365, "learning_rate": 6.100960049550945e-06, "loss": 0.1739, "step": 2890 }, { "epoch": 2.6532479414455628, "grad_norm": 3.802189826965332, "learning_rate": 5.9461133477856925e-06, "loss": 0.1506, "step": 2900 }, { "epoch": 2.6623970722781336, "grad_norm": 3.382754325866699, "learning_rate": 5.79126664602044e-06, "loss": 0.1701, "step": 2910 }, { "epoch": 2.6715462031107045, "grad_norm": 3.056814193725586, "learning_rate": 5.636419944255188e-06, "loss": 0.1638, "step": 2920 }, { "epoch": 2.6806953339432753, "grad_norm": 3.345564842224121, "learning_rate": 5.481573242489935e-06, "loss": 0.1318, "step": 2930 }, { "epoch": 2.689844464775846, "grad_norm": 3.740990400314331, "learning_rate": 5.326726540724683e-06, "loss": 0.1611, "step": 2940 }, { "epoch": 2.6989935956084175, "grad_norm": 2.8473143577575684, "learning_rate": 5.17187983895943e-06, "loss": 0.1684, "step": 2950 }, { "epoch": 2.708142726440988, "grad_norm": 2.8555662631988525, "learning_rate": 5.017033137194178e-06, "loss": 0.1883, "step": 2960 }, { "epoch": 2.717291857273559, "grad_norm": 4.181397438049316, "learning_rate": 4.8621864354289254e-06, "loss": 0.1677, "step": 2970 }, { "epoch": 2.72644098810613, "grad_norm": 4.9955949783325195, "learning_rate": 4.707339733663673e-06, "loss": 0.1724, "step": 2980 }, { "epoch": 2.735590118938701, "grad_norm": 3.999300956726074, "learning_rate": 4.552493031898421e-06, "loss": 0.1433, "step": 2990 }, { "epoch": 2.7447392497712717, "grad_norm": 3.054906129837036, "learning_rate": 4.397646330133168e-06, "loss": 0.1796, "step": 3000 }, { "epoch": 2.7447392497712717, "eval_loss": 0.2692735195159912, "eval_runtime": 414.8696, "eval_samples_per_second": 1.858, "eval_steps_per_second": 0.118, "eval_wer": 0.6916529645033369, "step": 3000 } ], "logging_steps": 10, "max_steps": 3279, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.18116487028736e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }