WhisperLevantine / trainer_state.json
carmi's picture
Upload 10 files
125f9cd verified
{
"best_metric": 13.76934528961673,
"best_model_checkpoint": "/speechbrain/data/whis/whisper-medium-ar-aug30-cont3/checkpoint-3600",
"epoch": 0.7889546351084813,
"eval_steps": 300,
"global_step": 3600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005478851632697786,
"grad_norm": 0.5829852819442749,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.1288,
"step": 25
},
{
"epoch": 0.010957703265395573,
"grad_norm": 0.5360992550849915,
"learning_rate": 5.000000000000001e-07,
"loss": 0.13,
"step": 50
},
{
"epoch": 0.01643655489809336,
"grad_norm": 0.5283806324005127,
"learning_rate": 7.5e-07,
"loss": 0.1272,
"step": 75
},
{
"epoch": 0.021915406530791146,
"grad_norm": 0.5019442439079285,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.1282,
"step": 100
},
{
"epoch": 0.027394258163488932,
"grad_norm": 0.4962950050830841,
"learning_rate": 1.25e-06,
"loss": 0.1225,
"step": 125
},
{
"epoch": 0.03287310979618672,
"grad_norm": 0.4953068792819977,
"learning_rate": 1.5e-06,
"loss": 0.1201,
"step": 150
},
{
"epoch": 0.03835196142888451,
"grad_norm": 0.5007308125495911,
"learning_rate": 1.75e-06,
"loss": 0.1216,
"step": 175
},
{
"epoch": 0.04383081306158229,
"grad_norm": 0.5254662036895752,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.1179,
"step": 200
},
{
"epoch": 0.04930966469428008,
"grad_norm": 0.45517662167549133,
"learning_rate": 2.25e-06,
"loss": 0.1171,
"step": 225
},
{
"epoch": 0.054788516326977864,
"grad_norm": 0.4700426459312439,
"learning_rate": 2.5e-06,
"loss": 0.1135,
"step": 250
},
{
"epoch": 0.060267367959675654,
"grad_norm": 0.48102590441703796,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.1108,
"step": 275
},
{
"epoch": 0.06574621959237344,
"grad_norm": 0.46885138750076294,
"learning_rate": 3e-06,
"loss": 0.111,
"step": 300
},
{
"epoch": 0.06574621959237344,
"eval_loss": 0.10784981399774551,
"eval_runtime": 1329.228,
"eval_samples_per_second": 2.505,
"eval_steps_per_second": 0.053,
"eval_wer": 13.824164156539604,
"step": 300
},
{
"epoch": 0.07122507122507123,
"grad_norm": 0.5266237854957581,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.1135,
"step": 325
},
{
"epoch": 0.07670392285776902,
"grad_norm": 0.5073336958885193,
"learning_rate": 3.5e-06,
"loss": 0.1102,
"step": 350
},
{
"epoch": 0.08218277449046679,
"grad_norm": 0.4480571150779724,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.1083,
"step": 375
},
{
"epoch": 0.08766162612316458,
"grad_norm": 0.5027902126312256,
"learning_rate": 4.000000000000001e-06,
"loss": 0.1056,
"step": 400
},
{
"epoch": 0.09314047775586237,
"grad_norm": 0.5120052695274353,
"learning_rate": 4.25e-06,
"loss": 0.1039,
"step": 425
},
{
"epoch": 0.09861932938856016,
"grad_norm": 0.48518097400665283,
"learning_rate": 4.5e-06,
"loss": 0.1043,
"step": 450
},
{
"epoch": 0.10409818102125794,
"grad_norm": 0.5022168159484863,
"learning_rate": 4.75e-06,
"loss": 0.1037,
"step": 475
},
{
"epoch": 0.10957703265395573,
"grad_norm": 0.4528847932815552,
"learning_rate": 5e-06,
"loss": 0.0985,
"step": 500
},
{
"epoch": 0.11505588428665352,
"grad_norm": 0.49148285388946533,
"learning_rate": 4.9986227412957256e-06,
"loss": 0.1004,
"step": 525
},
{
"epoch": 0.12053473591935131,
"grad_norm": 0.4991222321987152,
"learning_rate": 4.99724548259145e-06,
"loss": 0.1,
"step": 550
},
{
"epoch": 0.12601358755204908,
"grad_norm": 0.4580934941768646,
"learning_rate": 4.995868223887175e-06,
"loss": 0.0961,
"step": 575
},
{
"epoch": 0.13149243918474687,
"grad_norm": 0.4897126853466034,
"learning_rate": 4.9944909651829e-06,
"loss": 0.1,
"step": 600
},
{
"epoch": 0.13149243918474687,
"eval_loss": 0.1092229038476944,
"eval_runtime": 1332.663,
"eval_samples_per_second": 2.499,
"eval_steps_per_second": 0.053,
"eval_wer": 13.810892220337223,
"step": 600
},
{
"epoch": 0.13697129081744466,
"grad_norm": 0.47739720344543457,
"learning_rate": 4.993113706478625e-06,
"loss": 0.1049,
"step": 625
},
{
"epoch": 0.14245014245014245,
"grad_norm": 0.5223620533943176,
"learning_rate": 4.9917364477743505e-06,
"loss": 0.1001,
"step": 650
},
{
"epoch": 0.14792899408284024,
"grad_norm": 0.4523641765117645,
"learning_rate": 4.990359189070076e-06,
"loss": 0.0986,
"step": 675
},
{
"epoch": 0.15340784571553803,
"grad_norm": 0.47216111421585083,
"learning_rate": 4.988981930365801e-06,
"loss": 0.1015,
"step": 700
},
{
"epoch": 0.15888669734823582,
"grad_norm": 0.5023489594459534,
"learning_rate": 4.987604671661525e-06,
"loss": 0.0987,
"step": 725
},
{
"epoch": 0.16436554898093358,
"grad_norm": 0.5276343822479248,
"learning_rate": 4.98622741295725e-06,
"loss": 0.1003,
"step": 750
},
{
"epoch": 0.16984440061363137,
"grad_norm": 0.49953222274780273,
"learning_rate": 4.9848501542529754e-06,
"loss": 0.0983,
"step": 775
},
{
"epoch": 0.17532325224632916,
"grad_norm": 0.45781460404396057,
"learning_rate": 4.983472895548701e-06,
"loss": 0.0978,
"step": 800
},
{
"epoch": 0.18080210387902695,
"grad_norm": 0.4533933997154236,
"learning_rate": 4.982095636844425e-06,
"loss": 0.0984,
"step": 825
},
{
"epoch": 0.18628095551172474,
"grad_norm": 0.5115811824798584,
"learning_rate": 4.98071837814015e-06,
"loss": 0.097,
"step": 850
},
{
"epoch": 0.19175980714442253,
"grad_norm": 0.5033650994300842,
"learning_rate": 4.979341119435875e-06,
"loss": 0.0985,
"step": 875
},
{
"epoch": 0.19723865877712032,
"grad_norm": 0.4879961609840393,
"learning_rate": 4.9779638607316e-06,
"loss": 0.0968,
"step": 900
},
{
"epoch": 0.19723865877712032,
"eval_loss": 0.10996146500110626,
"eval_runtime": 1382.8033,
"eval_samples_per_second": 2.408,
"eval_steps_per_second": 0.051,
"eval_wer": 13.83570497062863,
"step": 900
},
{
"epoch": 0.2027175104098181,
"grad_norm": 0.4946504831314087,
"learning_rate": 4.9765866020273255e-06,
"loss": 0.0975,
"step": 925
},
{
"epoch": 0.20819636204251588,
"grad_norm": 0.5116554498672485,
"learning_rate": 4.975209343323051e-06,
"loss": 0.0973,
"step": 950
},
{
"epoch": 0.21367521367521367,
"grad_norm": 0.5216028094291687,
"learning_rate": 4.973832084618776e-06,
"loss": 0.097,
"step": 975
},
{
"epoch": 0.21915406530791146,
"grad_norm": 0.5032294392585754,
"learning_rate": 4.9724548259145e-06,
"loss": 0.0909,
"step": 1000
},
{
"epoch": 0.22463291694060925,
"grad_norm": 0.526467502117157,
"learning_rate": 4.971077567210225e-06,
"loss": 0.0969,
"step": 1025
},
{
"epoch": 0.23011176857330704,
"grad_norm": 0.488610178232193,
"learning_rate": 4.9697003085059505e-06,
"loss": 0.098,
"step": 1050
},
{
"epoch": 0.23559062020600482,
"grad_norm": 0.47755196690559387,
"learning_rate": 4.968323049801675e-06,
"loss": 0.0931,
"step": 1075
},
{
"epoch": 0.24106947183870261,
"grad_norm": 0.5348175168037415,
"learning_rate": 4.9669457910974e-06,
"loss": 0.0952,
"step": 1100
},
{
"epoch": 0.2465483234714004,
"grad_norm": 0.48804572224617004,
"learning_rate": 4.965568532393125e-06,
"loss": 0.0954,
"step": 1125
},
{
"epoch": 0.25202717510409817,
"grad_norm": 0.48517024517059326,
"learning_rate": 4.96419127368885e-06,
"loss": 0.0958,
"step": 1150
},
{
"epoch": 0.25750602673679596,
"grad_norm": 0.5918833017349243,
"learning_rate": 4.9628140149845745e-06,
"loss": 0.0958,
"step": 1175
},
{
"epoch": 0.26298487836949375,
"grad_norm": 0.5274895429611206,
"learning_rate": 4.9614367562803e-06,
"loss": 0.0967,
"step": 1200
},
{
"epoch": 0.26298487836949375,
"eval_loss": 0.11020273715257645,
"eval_runtime": 1336.5505,
"eval_samples_per_second": 2.491,
"eval_steps_per_second": 0.052,
"eval_wer": 14.059019723251279,
"step": 1200
},
{
"epoch": 0.26846373000219154,
"grad_norm": 0.4892116189002991,
"learning_rate": 4.960059497576025e-06,
"loss": 0.0929,
"step": 1225
},
{
"epoch": 0.2739425816348893,
"grad_norm": 0.4998278319835663,
"learning_rate": 4.95868223887175e-06,
"loss": 0.0948,
"step": 1250
},
{
"epoch": 0.2794214332675871,
"grad_norm": 0.5273219347000122,
"learning_rate": 4.957304980167475e-06,
"loss": 0.0907,
"step": 1275
},
{
"epoch": 0.2849002849002849,
"grad_norm": 0.47056299448013306,
"learning_rate": 4.9559277214632e-06,
"loss": 0.091,
"step": 1300
},
{
"epoch": 0.2903791365329827,
"grad_norm": 0.4882357716560364,
"learning_rate": 4.9545504627589255e-06,
"loss": 0.0941,
"step": 1325
},
{
"epoch": 0.2958579881656805,
"grad_norm": 0.5165619850158691,
"learning_rate": 4.95317320405465e-06,
"loss": 0.0921,
"step": 1350
},
{
"epoch": 0.3013368397983783,
"grad_norm": 0.4642132520675659,
"learning_rate": 4.951795945350375e-06,
"loss": 0.0914,
"step": 1375
},
{
"epoch": 0.30681569143107607,
"grad_norm": 0.5326189398765564,
"learning_rate": 4.9504186866461e-06,
"loss": 0.0959,
"step": 1400
},
{
"epoch": 0.31229454306377386,
"grad_norm": 0.44957414269447327,
"learning_rate": 4.949041427941824e-06,
"loss": 0.0871,
"step": 1425
},
{
"epoch": 0.31777339469647164,
"grad_norm": 0.4865795373916626,
"learning_rate": 4.9476641692375496e-06,
"loss": 0.0891,
"step": 1450
},
{
"epoch": 0.3232522463291694,
"grad_norm": 0.49055206775665283,
"learning_rate": 4.946286910533275e-06,
"loss": 0.0953,
"step": 1475
},
{
"epoch": 0.32873109796186717,
"grad_norm": 0.49437183141708374,
"learning_rate": 4.944909651829e-06,
"loss": 0.0896,
"step": 1500
},
{
"epoch": 0.32873109796186717,
"eval_loss": 0.11099947988986969,
"eval_runtime": 1334.6828,
"eval_samples_per_second": 2.495,
"eval_steps_per_second": 0.052,
"eval_wer": 13.836282011333079,
"step": 1500
},
{
"epoch": 0.33420994959456496,
"grad_norm": 0.5322751998901367,
"learning_rate": 4.943532393124725e-06,
"loss": 0.0929,
"step": 1525
},
{
"epoch": 0.33968880122726275,
"grad_norm": 0.5024107098579407,
"learning_rate": 4.94215513442045e-06,
"loss": 0.0922,
"step": 1550
},
{
"epoch": 0.34516765285996054,
"grad_norm": 0.4347039759159088,
"learning_rate": 4.940777875716175e-06,
"loss": 0.0908,
"step": 1575
},
{
"epoch": 0.35064650449265833,
"grad_norm": 0.5164802074432373,
"learning_rate": 4.9394006170119e-06,
"loss": 0.0939,
"step": 1600
},
{
"epoch": 0.3561253561253561,
"grad_norm": 0.4986899793148041,
"learning_rate": 4.938023358307625e-06,
"loss": 0.0883,
"step": 1625
},
{
"epoch": 0.3616042077580539,
"grad_norm": 0.5192301869392395,
"learning_rate": 4.93664609960335e-06,
"loss": 0.0915,
"step": 1650
},
{
"epoch": 0.3670830593907517,
"grad_norm": 0.5347697734832764,
"learning_rate": 4.935268840899075e-06,
"loss": 0.0884,
"step": 1675
},
{
"epoch": 0.3725619110234495,
"grad_norm": 0.47178414463996887,
"learning_rate": 4.9338915821947994e-06,
"loss": 0.0922,
"step": 1700
},
{
"epoch": 0.3780407626561473,
"grad_norm": 0.4868011772632599,
"learning_rate": 4.932514323490525e-06,
"loss": 0.0925,
"step": 1725
},
{
"epoch": 0.38351961428884507,
"grad_norm": 0.491805762052536,
"learning_rate": 4.93113706478625e-06,
"loss": 0.091,
"step": 1750
},
{
"epoch": 0.38899846592154286,
"grad_norm": 0.5111169219017029,
"learning_rate": 4.929759806081975e-06,
"loss": 0.0888,
"step": 1775
},
{
"epoch": 0.39447731755424065,
"grad_norm": 0.4957449436187744,
"learning_rate": 4.9283825473777e-06,
"loss": 0.0907,
"step": 1800
},
{
"epoch": 0.39447731755424065,
"eval_loss": 0.11147266626358032,
"eval_runtime": 1330.9042,
"eval_samples_per_second": 2.502,
"eval_steps_per_second": 0.053,
"eval_wer": 14.133457974125493,
"step": 1800
},
{
"epoch": 0.39995616918693844,
"grad_norm": 0.46782732009887695,
"learning_rate": 4.927005288673425e-06,
"loss": 0.0882,
"step": 1825
},
{
"epoch": 0.4054350208196362,
"grad_norm": 0.4959644079208374,
"learning_rate": 4.92562802996915e-06,
"loss": 0.0925,
"step": 1850
},
{
"epoch": 0.410913872452334,
"grad_norm": 0.4934210479259491,
"learning_rate": 4.924250771264875e-06,
"loss": 0.0901,
"step": 1875
},
{
"epoch": 0.41639272408503175,
"grad_norm": 0.520613968372345,
"learning_rate": 4.9228735125606e-06,
"loss": 0.0893,
"step": 1900
},
{
"epoch": 0.42187157571772954,
"grad_norm": 0.48207858204841614,
"learning_rate": 4.921496253856325e-06,
"loss": 0.0918,
"step": 1925
},
{
"epoch": 0.42735042735042733,
"grad_norm": 0.5212067365646362,
"learning_rate": 4.920118995152049e-06,
"loss": 0.0915,
"step": 1950
},
{
"epoch": 0.4328292789831251,
"grad_norm": 0.4570591449737549,
"learning_rate": 4.9187417364477744e-06,
"loss": 0.0879,
"step": 1975
},
{
"epoch": 0.4383081306158229,
"grad_norm": 0.5075387954711914,
"learning_rate": 4.9173644777435e-06,
"loss": 0.0921,
"step": 2000
},
{
"epoch": 0.4437869822485207,
"grad_norm": 0.4904765784740448,
"learning_rate": 4.915987219039225e-06,
"loss": 0.0892,
"step": 2025
},
{
"epoch": 0.4492658338812185,
"grad_norm": 0.4949191212654114,
"learning_rate": 4.91460996033495e-06,
"loss": 0.0909,
"step": 2050
},
{
"epoch": 0.4547446855139163,
"grad_norm": 0.5112493634223938,
"learning_rate": 4.913232701630675e-06,
"loss": 0.089,
"step": 2075
},
{
"epoch": 0.46022353714661407,
"grad_norm": 0.47857844829559326,
"learning_rate": 4.9118554429264e-06,
"loss": 0.0901,
"step": 2100
},
{
"epoch": 0.46022353714661407,
"eval_loss": 0.112032450735569,
"eval_runtime": 1387.0176,
"eval_samples_per_second": 2.401,
"eval_steps_per_second": 0.05,
"eval_wer": 13.988043716603768,
"step": 2100
},
{
"epoch": 0.46570238877931186,
"grad_norm": 0.4768081307411194,
"learning_rate": 4.9104781842221245e-06,
"loss": 0.0874,
"step": 2125
},
{
"epoch": 0.47118124041200965,
"grad_norm": 0.4740845859050751,
"learning_rate": 4.90910092551785e-06,
"loss": 0.0885,
"step": 2150
},
{
"epoch": 0.47666009204470744,
"grad_norm": 0.4519156813621521,
"learning_rate": 4.907723666813575e-06,
"loss": 0.0867,
"step": 2175
},
{
"epoch": 0.48213894367740523,
"grad_norm": 0.5068197250366211,
"learning_rate": 4.9063464081093e-06,
"loss": 0.0878,
"step": 2200
},
{
"epoch": 0.487617795310103,
"grad_norm": 0.49033084511756897,
"learning_rate": 4.904969149405024e-06,
"loss": 0.0862,
"step": 2225
},
{
"epoch": 0.4930966469428008,
"grad_norm": 0.4625925123691559,
"learning_rate": 4.9035918907007495e-06,
"loss": 0.0866,
"step": 2250
},
{
"epoch": 0.4985754985754986,
"grad_norm": 0.5056318640708923,
"learning_rate": 4.902214631996475e-06,
"loss": 0.086,
"step": 2275
},
{
"epoch": 0.5040543502081963,
"grad_norm": 0.46904438734054565,
"learning_rate": 4.9008373732922e-06,
"loss": 0.0836,
"step": 2300
},
{
"epoch": 0.5095332018408941,
"grad_norm": 0.5033324360847473,
"learning_rate": 4.899460114587924e-06,
"loss": 0.0879,
"step": 2325
},
{
"epoch": 0.5150120534735919,
"grad_norm": 0.5081333518028259,
"learning_rate": 4.898082855883649e-06,
"loss": 0.0867,
"step": 2350
},
{
"epoch": 0.5204909051062897,
"grad_norm": 0.44954633712768555,
"learning_rate": 4.896705597179374e-06,
"loss": 0.0859,
"step": 2375
},
{
"epoch": 0.5259697567389875,
"grad_norm": 0.5036991238594055,
"learning_rate": 4.8953283384750996e-06,
"loss": 0.0823,
"step": 2400
},
{
"epoch": 0.5259697567389875,
"eval_loss": 0.11307456344366074,
"eval_runtime": 1331.1273,
"eval_samples_per_second": 2.502,
"eval_steps_per_second": 0.053,
"eval_wer": 13.977079943219195,
"step": 2400
},
{
"epoch": 0.5314486083716853,
"grad_norm": 0.48715198040008545,
"learning_rate": 4.893951079770825e-06,
"loss": 0.0853,
"step": 2425
},
{
"epoch": 0.5369274600043831,
"grad_norm": 0.5139690041542053,
"learning_rate": 4.89257382106655e-06,
"loss": 0.0874,
"step": 2450
},
{
"epoch": 0.5424063116370809,
"grad_norm": 0.49623942375183105,
"learning_rate": 4.891196562362275e-06,
"loss": 0.0893,
"step": 2475
},
{
"epoch": 0.5478851632697787,
"grad_norm": 0.5240609645843506,
"learning_rate": 4.889819303657999e-06,
"loss": 0.0857,
"step": 2500
},
{
"epoch": 0.5533640149024764,
"grad_norm": 0.5464821457862854,
"learning_rate": 4.8884420449537245e-06,
"loss": 0.0858,
"step": 2525
},
{
"epoch": 0.5588428665351742,
"grad_norm": 0.49569082260131836,
"learning_rate": 4.88706478624945e-06,
"loss": 0.085,
"step": 2550
},
{
"epoch": 0.564321718167872,
"grad_norm": 0.5617781281471252,
"learning_rate": 4.885687527545174e-06,
"loss": 0.0861,
"step": 2575
},
{
"epoch": 0.5698005698005698,
"grad_norm": 0.538022518157959,
"learning_rate": 4.884310268840899e-06,
"loss": 0.0868,
"step": 2600
},
{
"epoch": 0.5752794214332676,
"grad_norm": 0.4421217143535614,
"learning_rate": 4.882933010136624e-06,
"loss": 0.085,
"step": 2625
},
{
"epoch": 0.5807582730659654,
"grad_norm": 0.4933975040912628,
"learning_rate": 4.881555751432349e-06,
"loss": 0.0836,
"step": 2650
},
{
"epoch": 0.5862371246986632,
"grad_norm": 0.5269121527671814,
"learning_rate": 4.880178492728075e-06,
"loss": 0.0855,
"step": 2675
},
{
"epoch": 0.591715976331361,
"grad_norm": 0.49818453192710876,
"learning_rate": 4.8788012340238e-06,
"loss": 0.0818,
"step": 2700
},
{
"epoch": 0.591715976331361,
"eval_loss": 0.11333612352609634,
"eval_runtime": 1354.7808,
"eval_samples_per_second": 2.458,
"eval_steps_per_second": 0.052,
"eval_wer": 14.018049833235235,
"step": 2700
},
{
"epoch": 0.5971948279640588,
"grad_norm": 0.5359761714935303,
"learning_rate": 4.877423975319525e-06,
"loss": 0.0803,
"step": 2725
},
{
"epoch": 0.6026736795967566,
"grad_norm": 0.5219433903694153,
"learning_rate": 4.876046716615249e-06,
"loss": 0.0835,
"step": 2750
},
{
"epoch": 0.6081525312294543,
"grad_norm": 0.4877767562866211,
"learning_rate": 4.874669457910974e-06,
"loss": 0.0827,
"step": 2775
},
{
"epoch": 0.6136313828621521,
"grad_norm": 0.47034549713134766,
"learning_rate": 4.8732921992066995e-06,
"loss": 0.0803,
"step": 2800
},
{
"epoch": 0.6191102344948499,
"grad_norm": 0.5331267714500427,
"learning_rate": 4.871914940502425e-06,
"loss": 0.0827,
"step": 2825
},
{
"epoch": 0.6245890861275477,
"grad_norm": 0.5360026955604553,
"learning_rate": 4.870537681798149e-06,
"loss": 0.0848,
"step": 2850
},
{
"epoch": 0.6300679377602455,
"grad_norm": 0.5023711323738098,
"learning_rate": 4.869160423093874e-06,
"loss": 0.0816,
"step": 2875
},
{
"epoch": 0.6355467893929433,
"grad_norm": 0.43558841943740845,
"learning_rate": 4.867783164389599e-06,
"loss": 0.0823,
"step": 2900
},
{
"epoch": 0.6410256410256411,
"grad_norm": 0.52950519323349,
"learning_rate": 4.8664059056853244e-06,
"loss": 0.0832,
"step": 2925
},
{
"epoch": 0.6465044926583388,
"grad_norm": 0.49947696924209595,
"learning_rate": 4.86502864698105e-06,
"loss": 0.0825,
"step": 2950
},
{
"epoch": 0.6519833442910365,
"grad_norm": 0.4842943549156189,
"learning_rate": 4.863651388276775e-06,
"loss": 0.0821,
"step": 2975
},
{
"epoch": 0.6574621959237343,
"grad_norm": 0.6517378091812134,
"learning_rate": 4.8622741295725e-06,
"loss": 0.0945,
"step": 3000
},
{
"epoch": 0.6574621959237343,
"eval_loss": 0.11205233633518219,
"eval_runtime": 1330.2589,
"eval_samples_per_second": 2.503,
"eval_steps_per_second": 0.053,
"eval_wer": 13.976502902514742,
"step": 3000
},
{
"epoch": 0.6629410475564321,
"grad_norm": 0.5949074029922485,
"learning_rate": 4.860896870868224e-06,
"loss": 0.1253,
"step": 3025
},
{
"epoch": 0.6684198991891299,
"grad_norm": 0.5628743171691895,
"learning_rate": 4.859519612163949e-06,
"loss": 0.1232,
"step": 3050
},
{
"epoch": 0.6738987508218277,
"grad_norm": 0.5839057564735413,
"learning_rate": 4.8581423534596745e-06,
"loss": 0.1228,
"step": 3075
},
{
"epoch": 0.6793776024545255,
"grad_norm": 0.5640609860420227,
"learning_rate": 4.856765094755399e-06,
"loss": 0.1181,
"step": 3100
},
{
"epoch": 0.6848564540872233,
"grad_norm": 0.6778563261032104,
"learning_rate": 4.855387836051124e-06,
"loss": 0.1208,
"step": 3125
},
{
"epoch": 0.6903353057199211,
"grad_norm": 0.603071928024292,
"learning_rate": 4.854010577346849e-06,
"loss": 0.1193,
"step": 3150
},
{
"epoch": 0.6958141573526189,
"grad_norm": 0.6698121428489685,
"learning_rate": 4.852633318642574e-06,
"loss": 0.1209,
"step": 3175
},
{
"epoch": 0.7012930089853167,
"grad_norm": 0.5631791353225708,
"learning_rate": 4.8512560599382995e-06,
"loss": 0.1223,
"step": 3200
},
{
"epoch": 0.7067718606180144,
"grad_norm": 0.5904573798179626,
"learning_rate": 4.849878801234025e-06,
"loss": 0.119,
"step": 3225
},
{
"epoch": 0.7122507122507122,
"grad_norm": 0.6524720788002014,
"learning_rate": 4.84850154252975e-06,
"loss": 0.1194,
"step": 3250
},
{
"epoch": 0.71772956388341,
"grad_norm": 0.6679468154907227,
"learning_rate": 4.847124283825474e-06,
"loss": 0.1224,
"step": 3275
},
{
"epoch": 0.7232084155161078,
"grad_norm": 0.5249156951904297,
"learning_rate": 4.845747025121199e-06,
"loss": 0.1193,
"step": 3300
},
{
"epoch": 0.7232084155161078,
"eval_loss": 0.10731059312820435,
"eval_runtime": 1334.5691,
"eval_samples_per_second": 2.495,
"eval_steps_per_second": 0.052,
"eval_wer": 13.910143221502844,
"step": 3300
},
{
"epoch": 0.7286872671488056,
"grad_norm": 0.6170015931129456,
"learning_rate": 4.844369766416924e-06,
"loss": 0.118,
"step": 3325
},
{
"epoch": 0.7341661187815034,
"grad_norm": 0.622870922088623,
"learning_rate": 4.8429925077126496e-06,
"loss": 0.1213,
"step": 3350
},
{
"epoch": 0.7396449704142012,
"grad_norm": 0.6220366358757019,
"learning_rate": 4.841615249008374e-06,
"loss": 0.1199,
"step": 3375
},
{
"epoch": 0.745123822046899,
"grad_norm": 0.6058914661407471,
"learning_rate": 4.840237990304099e-06,
"loss": 0.1177,
"step": 3400
},
{
"epoch": 0.7506026736795968,
"grad_norm": 0.618475079536438,
"learning_rate": 4.838860731599824e-06,
"loss": 0.1193,
"step": 3425
},
{
"epoch": 0.7560815253122946,
"grad_norm": 0.6013332009315491,
"learning_rate": 4.8374834728955485e-06,
"loss": 0.1185,
"step": 3450
},
{
"epoch": 0.7615603769449923,
"grad_norm": 0.5604269504547119,
"learning_rate": 4.836106214191274e-06,
"loss": 0.1169,
"step": 3475
},
{
"epoch": 0.7670392285776901,
"grad_norm": 0.5583498477935791,
"learning_rate": 4.834728955486999e-06,
"loss": 0.1205,
"step": 3500
},
{
"epoch": 0.7725180802103879,
"grad_norm": 0.5525631904602051,
"learning_rate": 4.833351696782724e-06,
"loss": 0.1192,
"step": 3525
},
{
"epoch": 0.7779969318430857,
"grad_norm": 0.5905235409736633,
"learning_rate": 4.831974438078449e-06,
"loss": 0.1192,
"step": 3550
},
{
"epoch": 0.7834757834757835,
"grad_norm": 0.5701056122779846,
"learning_rate": 4.830597179374174e-06,
"loss": 0.1189,
"step": 3575
},
{
"epoch": 0.7889546351084813,
"grad_norm": 0.5544924139976501,
"learning_rate": 4.829219920669899e-06,
"loss": 0.1179,
"step": 3600
},
{
"epoch": 0.7889546351084813,
"eval_loss": 0.10721833258867264,
"eval_runtime": 1334.3717,
"eval_samples_per_second": 2.496,
"eval_steps_per_second": 0.052,
"eval_wer": 13.76934528961673,
"step": 3600
}
],
"logging_steps": 25,
"max_steps": 91260,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.527210695946341e+20,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}