|
{ |
|
"best_metric": 13.76934528961673, |
|
"best_model_checkpoint": "/speechbrain/data/whis/whisper-medium-ar-aug30-cont3/checkpoint-3600", |
|
"epoch": 0.7889546351084813, |
|
"eval_steps": 300, |
|
"global_step": 3600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005478851632697786, |
|
"grad_norm": 0.5829852819442749, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"loss": 0.1288, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.010957703265395573, |
|
"grad_norm": 0.5360992550849915, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.13, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01643655489809336, |
|
"grad_norm": 0.5283806324005127, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.1272, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.021915406530791146, |
|
"grad_norm": 0.5019442439079285, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1282, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.027394258163488932, |
|
"grad_norm": 0.4962950050830841, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.1225, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03287310979618672, |
|
"grad_norm": 0.4953068792819977, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.1201, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03835196142888451, |
|
"grad_norm": 0.5007308125495911, |
|
"learning_rate": 1.75e-06, |
|
"loss": 0.1216, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.04383081306158229, |
|
"grad_norm": 0.5254662036895752, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.1179, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04930966469428008, |
|
"grad_norm": 0.45517662167549133, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.1171, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.054788516326977864, |
|
"grad_norm": 0.4700426459312439, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1135, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.060267367959675654, |
|
"grad_norm": 0.48102590441703796, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"loss": 0.1108, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.06574621959237344, |
|
"grad_norm": 0.46885138750076294, |
|
"learning_rate": 3e-06, |
|
"loss": 0.111, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06574621959237344, |
|
"eval_loss": 0.10784981399774551, |
|
"eval_runtime": 1329.228, |
|
"eval_samples_per_second": 2.505, |
|
"eval_steps_per_second": 0.053, |
|
"eval_wer": 13.824164156539604, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07122507122507123, |
|
"grad_norm": 0.5266237854957581, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"loss": 0.1135, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.07670392285776902, |
|
"grad_norm": 0.5073336958885193, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.1102, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08218277449046679, |
|
"grad_norm": 0.4480571150779724, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.1083, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.08766162612316458, |
|
"grad_norm": 0.5027902126312256, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.1056, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09314047775586237, |
|
"grad_norm": 0.5120052695274353, |
|
"learning_rate": 4.25e-06, |
|
"loss": 0.1039, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.09861932938856016, |
|
"grad_norm": 0.48518097400665283, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.1043, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.10409818102125794, |
|
"grad_norm": 0.5022168159484863, |
|
"learning_rate": 4.75e-06, |
|
"loss": 0.1037, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.10957703265395573, |
|
"grad_norm": 0.4528847932815552, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0985, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11505588428665352, |
|
"grad_norm": 0.49148285388946533, |
|
"learning_rate": 4.9986227412957256e-06, |
|
"loss": 0.1004, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.12053473591935131, |
|
"grad_norm": 0.4991222321987152, |
|
"learning_rate": 4.99724548259145e-06, |
|
"loss": 0.1, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.12601358755204908, |
|
"grad_norm": 0.4580934941768646, |
|
"learning_rate": 4.995868223887175e-06, |
|
"loss": 0.0961, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.13149243918474687, |
|
"grad_norm": 0.4897126853466034, |
|
"learning_rate": 4.9944909651829e-06, |
|
"loss": 0.1, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13149243918474687, |
|
"eval_loss": 0.1092229038476944, |
|
"eval_runtime": 1332.663, |
|
"eval_samples_per_second": 2.499, |
|
"eval_steps_per_second": 0.053, |
|
"eval_wer": 13.810892220337223, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13697129081744466, |
|
"grad_norm": 0.47739720344543457, |
|
"learning_rate": 4.993113706478625e-06, |
|
"loss": 0.1049, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.14245014245014245, |
|
"grad_norm": 0.5223620533943176, |
|
"learning_rate": 4.9917364477743505e-06, |
|
"loss": 0.1001, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.14792899408284024, |
|
"grad_norm": 0.4523641765117645, |
|
"learning_rate": 4.990359189070076e-06, |
|
"loss": 0.0986, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.15340784571553803, |
|
"grad_norm": 0.47216111421585083, |
|
"learning_rate": 4.988981930365801e-06, |
|
"loss": 0.1015, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.15888669734823582, |
|
"grad_norm": 0.5023489594459534, |
|
"learning_rate": 4.987604671661525e-06, |
|
"loss": 0.0987, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.16436554898093358, |
|
"grad_norm": 0.5276343822479248, |
|
"learning_rate": 4.98622741295725e-06, |
|
"loss": 0.1003, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.16984440061363137, |
|
"grad_norm": 0.49953222274780273, |
|
"learning_rate": 4.9848501542529754e-06, |
|
"loss": 0.0983, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.17532325224632916, |
|
"grad_norm": 0.45781460404396057, |
|
"learning_rate": 4.983472895548701e-06, |
|
"loss": 0.0978, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.18080210387902695, |
|
"grad_norm": 0.4533933997154236, |
|
"learning_rate": 4.982095636844425e-06, |
|
"loss": 0.0984, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.18628095551172474, |
|
"grad_norm": 0.5115811824798584, |
|
"learning_rate": 4.98071837814015e-06, |
|
"loss": 0.097, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.19175980714442253, |
|
"grad_norm": 0.5033650994300842, |
|
"learning_rate": 4.979341119435875e-06, |
|
"loss": 0.0985, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.19723865877712032, |
|
"grad_norm": 0.4879961609840393, |
|
"learning_rate": 4.9779638607316e-06, |
|
"loss": 0.0968, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.19723865877712032, |
|
"eval_loss": 0.10996146500110626, |
|
"eval_runtime": 1382.8033, |
|
"eval_samples_per_second": 2.408, |
|
"eval_steps_per_second": 0.051, |
|
"eval_wer": 13.83570497062863, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2027175104098181, |
|
"grad_norm": 0.4946504831314087, |
|
"learning_rate": 4.9765866020273255e-06, |
|
"loss": 0.0975, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.20819636204251588, |
|
"grad_norm": 0.5116554498672485, |
|
"learning_rate": 4.975209343323051e-06, |
|
"loss": 0.0973, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.21367521367521367, |
|
"grad_norm": 0.5216028094291687, |
|
"learning_rate": 4.973832084618776e-06, |
|
"loss": 0.097, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.21915406530791146, |
|
"grad_norm": 0.5032294392585754, |
|
"learning_rate": 4.9724548259145e-06, |
|
"loss": 0.0909, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22463291694060925, |
|
"grad_norm": 0.526467502117157, |
|
"learning_rate": 4.971077567210225e-06, |
|
"loss": 0.0969, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.23011176857330704, |
|
"grad_norm": 0.488610178232193, |
|
"learning_rate": 4.9697003085059505e-06, |
|
"loss": 0.098, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.23559062020600482, |
|
"grad_norm": 0.47755196690559387, |
|
"learning_rate": 4.968323049801675e-06, |
|
"loss": 0.0931, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.24106947183870261, |
|
"grad_norm": 0.5348175168037415, |
|
"learning_rate": 4.9669457910974e-06, |
|
"loss": 0.0952, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2465483234714004, |
|
"grad_norm": 0.48804572224617004, |
|
"learning_rate": 4.965568532393125e-06, |
|
"loss": 0.0954, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.25202717510409817, |
|
"grad_norm": 0.48517024517059326, |
|
"learning_rate": 4.96419127368885e-06, |
|
"loss": 0.0958, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.25750602673679596, |
|
"grad_norm": 0.5918833017349243, |
|
"learning_rate": 4.9628140149845745e-06, |
|
"loss": 0.0958, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.26298487836949375, |
|
"grad_norm": 0.5274895429611206, |
|
"learning_rate": 4.9614367562803e-06, |
|
"loss": 0.0967, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.26298487836949375, |
|
"eval_loss": 0.11020273715257645, |
|
"eval_runtime": 1336.5505, |
|
"eval_samples_per_second": 2.491, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 14.059019723251279, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.26846373000219154, |
|
"grad_norm": 0.4892116189002991, |
|
"learning_rate": 4.960059497576025e-06, |
|
"loss": 0.0929, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.2739425816348893, |
|
"grad_norm": 0.4998278319835663, |
|
"learning_rate": 4.95868223887175e-06, |
|
"loss": 0.0948, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2794214332675871, |
|
"grad_norm": 0.5273219347000122, |
|
"learning_rate": 4.957304980167475e-06, |
|
"loss": 0.0907, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.2849002849002849, |
|
"grad_norm": 0.47056299448013306, |
|
"learning_rate": 4.9559277214632e-06, |
|
"loss": 0.091, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2903791365329827, |
|
"grad_norm": 0.4882357716560364, |
|
"learning_rate": 4.9545504627589255e-06, |
|
"loss": 0.0941, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.2958579881656805, |
|
"grad_norm": 0.5165619850158691, |
|
"learning_rate": 4.95317320405465e-06, |
|
"loss": 0.0921, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3013368397983783, |
|
"grad_norm": 0.4642132520675659, |
|
"learning_rate": 4.951795945350375e-06, |
|
"loss": 0.0914, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.30681569143107607, |
|
"grad_norm": 0.5326189398765564, |
|
"learning_rate": 4.9504186866461e-06, |
|
"loss": 0.0959, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.31229454306377386, |
|
"grad_norm": 0.44957414269447327, |
|
"learning_rate": 4.949041427941824e-06, |
|
"loss": 0.0871, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.31777339469647164, |
|
"grad_norm": 0.4865795373916626, |
|
"learning_rate": 4.9476641692375496e-06, |
|
"loss": 0.0891, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3232522463291694, |
|
"grad_norm": 0.49055206775665283, |
|
"learning_rate": 4.946286910533275e-06, |
|
"loss": 0.0953, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.32873109796186717, |
|
"grad_norm": 0.49437183141708374, |
|
"learning_rate": 4.944909651829e-06, |
|
"loss": 0.0896, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.32873109796186717, |
|
"eval_loss": 0.11099947988986969, |
|
"eval_runtime": 1334.6828, |
|
"eval_samples_per_second": 2.495, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 13.836282011333079, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33420994959456496, |
|
"grad_norm": 0.5322751998901367, |
|
"learning_rate": 4.943532393124725e-06, |
|
"loss": 0.0929, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.33968880122726275, |
|
"grad_norm": 0.5024107098579407, |
|
"learning_rate": 4.94215513442045e-06, |
|
"loss": 0.0922, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.34516765285996054, |
|
"grad_norm": 0.4347039759159088, |
|
"learning_rate": 4.940777875716175e-06, |
|
"loss": 0.0908, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.35064650449265833, |
|
"grad_norm": 0.5164802074432373, |
|
"learning_rate": 4.9394006170119e-06, |
|
"loss": 0.0939, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3561253561253561, |
|
"grad_norm": 0.4986899793148041, |
|
"learning_rate": 4.938023358307625e-06, |
|
"loss": 0.0883, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.3616042077580539, |
|
"grad_norm": 0.5192301869392395, |
|
"learning_rate": 4.93664609960335e-06, |
|
"loss": 0.0915, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3670830593907517, |
|
"grad_norm": 0.5347697734832764, |
|
"learning_rate": 4.935268840899075e-06, |
|
"loss": 0.0884, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.3725619110234495, |
|
"grad_norm": 0.47178414463996887, |
|
"learning_rate": 4.9338915821947994e-06, |
|
"loss": 0.0922, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3780407626561473, |
|
"grad_norm": 0.4868011772632599, |
|
"learning_rate": 4.932514323490525e-06, |
|
"loss": 0.0925, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.38351961428884507, |
|
"grad_norm": 0.491805762052536, |
|
"learning_rate": 4.93113706478625e-06, |
|
"loss": 0.091, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.38899846592154286, |
|
"grad_norm": 0.5111169219017029, |
|
"learning_rate": 4.929759806081975e-06, |
|
"loss": 0.0888, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.39447731755424065, |
|
"grad_norm": 0.4957449436187744, |
|
"learning_rate": 4.9283825473777e-06, |
|
"loss": 0.0907, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.39447731755424065, |
|
"eval_loss": 0.11147266626358032, |
|
"eval_runtime": 1330.9042, |
|
"eval_samples_per_second": 2.502, |
|
"eval_steps_per_second": 0.053, |
|
"eval_wer": 14.133457974125493, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.39995616918693844, |
|
"grad_norm": 0.46782732009887695, |
|
"learning_rate": 4.927005288673425e-06, |
|
"loss": 0.0882, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.4054350208196362, |
|
"grad_norm": 0.4959644079208374, |
|
"learning_rate": 4.92562802996915e-06, |
|
"loss": 0.0925, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.410913872452334, |
|
"grad_norm": 0.4934210479259491, |
|
"learning_rate": 4.924250771264875e-06, |
|
"loss": 0.0901, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.41639272408503175, |
|
"grad_norm": 0.520613968372345, |
|
"learning_rate": 4.9228735125606e-06, |
|
"loss": 0.0893, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.42187157571772954, |
|
"grad_norm": 0.48207858204841614, |
|
"learning_rate": 4.921496253856325e-06, |
|
"loss": 0.0918, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.42735042735042733, |
|
"grad_norm": 0.5212067365646362, |
|
"learning_rate": 4.920118995152049e-06, |
|
"loss": 0.0915, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4328292789831251, |
|
"grad_norm": 0.4570591449737549, |
|
"learning_rate": 4.9187417364477744e-06, |
|
"loss": 0.0879, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.4383081306158229, |
|
"grad_norm": 0.5075387954711914, |
|
"learning_rate": 4.9173644777435e-06, |
|
"loss": 0.0921, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4437869822485207, |
|
"grad_norm": 0.4904765784740448, |
|
"learning_rate": 4.915987219039225e-06, |
|
"loss": 0.0892, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.4492658338812185, |
|
"grad_norm": 0.4949191212654114, |
|
"learning_rate": 4.91460996033495e-06, |
|
"loss": 0.0909, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4547446855139163, |
|
"grad_norm": 0.5112493634223938, |
|
"learning_rate": 4.913232701630675e-06, |
|
"loss": 0.089, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.46022353714661407, |
|
"grad_norm": 0.47857844829559326, |
|
"learning_rate": 4.9118554429264e-06, |
|
"loss": 0.0901, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.46022353714661407, |
|
"eval_loss": 0.112032450735569, |
|
"eval_runtime": 1387.0176, |
|
"eval_samples_per_second": 2.401, |
|
"eval_steps_per_second": 0.05, |
|
"eval_wer": 13.988043716603768, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.46570238877931186, |
|
"grad_norm": 0.4768081307411194, |
|
"learning_rate": 4.9104781842221245e-06, |
|
"loss": 0.0874, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.47118124041200965, |
|
"grad_norm": 0.4740845859050751, |
|
"learning_rate": 4.90910092551785e-06, |
|
"loss": 0.0885, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.47666009204470744, |
|
"grad_norm": 0.4519156813621521, |
|
"learning_rate": 4.907723666813575e-06, |
|
"loss": 0.0867, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.48213894367740523, |
|
"grad_norm": 0.5068197250366211, |
|
"learning_rate": 4.9063464081093e-06, |
|
"loss": 0.0878, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.487617795310103, |
|
"grad_norm": 0.49033084511756897, |
|
"learning_rate": 4.904969149405024e-06, |
|
"loss": 0.0862, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.4930966469428008, |
|
"grad_norm": 0.4625925123691559, |
|
"learning_rate": 4.9035918907007495e-06, |
|
"loss": 0.0866, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.4985754985754986, |
|
"grad_norm": 0.5056318640708923, |
|
"learning_rate": 4.902214631996475e-06, |
|
"loss": 0.086, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.5040543502081963, |
|
"grad_norm": 0.46904438734054565, |
|
"learning_rate": 4.9008373732922e-06, |
|
"loss": 0.0836, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5095332018408941, |
|
"grad_norm": 0.5033324360847473, |
|
"learning_rate": 4.899460114587924e-06, |
|
"loss": 0.0879, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.5150120534735919, |
|
"grad_norm": 0.5081333518028259, |
|
"learning_rate": 4.898082855883649e-06, |
|
"loss": 0.0867, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5204909051062897, |
|
"grad_norm": 0.44954633712768555, |
|
"learning_rate": 4.896705597179374e-06, |
|
"loss": 0.0859, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.5259697567389875, |
|
"grad_norm": 0.5036991238594055, |
|
"learning_rate": 4.8953283384750996e-06, |
|
"loss": 0.0823, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5259697567389875, |
|
"eval_loss": 0.11307456344366074, |
|
"eval_runtime": 1331.1273, |
|
"eval_samples_per_second": 2.502, |
|
"eval_steps_per_second": 0.053, |
|
"eval_wer": 13.977079943219195, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5314486083716853, |
|
"grad_norm": 0.48715198040008545, |
|
"learning_rate": 4.893951079770825e-06, |
|
"loss": 0.0853, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.5369274600043831, |
|
"grad_norm": 0.5139690041542053, |
|
"learning_rate": 4.89257382106655e-06, |
|
"loss": 0.0874, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5424063116370809, |
|
"grad_norm": 0.49623942375183105, |
|
"learning_rate": 4.891196562362275e-06, |
|
"loss": 0.0893, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.5478851632697787, |
|
"grad_norm": 0.5240609645843506, |
|
"learning_rate": 4.889819303657999e-06, |
|
"loss": 0.0857, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5533640149024764, |
|
"grad_norm": 0.5464821457862854, |
|
"learning_rate": 4.8884420449537245e-06, |
|
"loss": 0.0858, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.5588428665351742, |
|
"grad_norm": 0.49569082260131836, |
|
"learning_rate": 4.88706478624945e-06, |
|
"loss": 0.085, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.564321718167872, |
|
"grad_norm": 0.5617781281471252, |
|
"learning_rate": 4.885687527545174e-06, |
|
"loss": 0.0861, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.5698005698005698, |
|
"grad_norm": 0.538022518157959, |
|
"learning_rate": 4.884310268840899e-06, |
|
"loss": 0.0868, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5752794214332676, |
|
"grad_norm": 0.4421217143535614, |
|
"learning_rate": 4.882933010136624e-06, |
|
"loss": 0.085, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.5807582730659654, |
|
"grad_norm": 0.4933975040912628, |
|
"learning_rate": 4.881555751432349e-06, |
|
"loss": 0.0836, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.5862371246986632, |
|
"grad_norm": 0.5269121527671814, |
|
"learning_rate": 4.880178492728075e-06, |
|
"loss": 0.0855, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.591715976331361, |
|
"grad_norm": 0.49818453192710876, |
|
"learning_rate": 4.8788012340238e-06, |
|
"loss": 0.0818, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.591715976331361, |
|
"eval_loss": 0.11333612352609634, |
|
"eval_runtime": 1354.7808, |
|
"eval_samples_per_second": 2.458, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 14.018049833235235, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5971948279640588, |
|
"grad_norm": 0.5359761714935303, |
|
"learning_rate": 4.877423975319525e-06, |
|
"loss": 0.0803, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.6026736795967566, |
|
"grad_norm": 0.5219433903694153, |
|
"learning_rate": 4.876046716615249e-06, |
|
"loss": 0.0835, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6081525312294543, |
|
"grad_norm": 0.4877767562866211, |
|
"learning_rate": 4.874669457910974e-06, |
|
"loss": 0.0827, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.6136313828621521, |
|
"grad_norm": 0.47034549713134766, |
|
"learning_rate": 4.8732921992066995e-06, |
|
"loss": 0.0803, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6191102344948499, |
|
"grad_norm": 0.5331267714500427, |
|
"learning_rate": 4.871914940502425e-06, |
|
"loss": 0.0827, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.6245890861275477, |
|
"grad_norm": 0.5360026955604553, |
|
"learning_rate": 4.870537681798149e-06, |
|
"loss": 0.0848, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6300679377602455, |
|
"grad_norm": 0.5023711323738098, |
|
"learning_rate": 4.869160423093874e-06, |
|
"loss": 0.0816, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.6355467893929433, |
|
"grad_norm": 0.43558841943740845, |
|
"learning_rate": 4.867783164389599e-06, |
|
"loss": 0.0823, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6410256410256411, |
|
"grad_norm": 0.52950519323349, |
|
"learning_rate": 4.8664059056853244e-06, |
|
"loss": 0.0832, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.6465044926583388, |
|
"grad_norm": 0.49947696924209595, |
|
"learning_rate": 4.86502864698105e-06, |
|
"loss": 0.0825, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.6519833442910365, |
|
"grad_norm": 0.4842943549156189, |
|
"learning_rate": 4.863651388276775e-06, |
|
"loss": 0.0821, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.6574621959237343, |
|
"grad_norm": 0.6517378091812134, |
|
"learning_rate": 4.8622741295725e-06, |
|
"loss": 0.0945, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6574621959237343, |
|
"eval_loss": 0.11205233633518219, |
|
"eval_runtime": 1330.2589, |
|
"eval_samples_per_second": 2.503, |
|
"eval_steps_per_second": 0.053, |
|
"eval_wer": 13.976502902514742, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6629410475564321, |
|
"grad_norm": 0.5949074029922485, |
|
"learning_rate": 4.860896870868224e-06, |
|
"loss": 0.1253, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.6684198991891299, |
|
"grad_norm": 0.5628743171691895, |
|
"learning_rate": 4.859519612163949e-06, |
|
"loss": 0.1232, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.6738987508218277, |
|
"grad_norm": 0.5839057564735413, |
|
"learning_rate": 4.8581423534596745e-06, |
|
"loss": 0.1228, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.6793776024545255, |
|
"grad_norm": 0.5640609860420227, |
|
"learning_rate": 4.856765094755399e-06, |
|
"loss": 0.1181, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6848564540872233, |
|
"grad_norm": 0.6778563261032104, |
|
"learning_rate": 4.855387836051124e-06, |
|
"loss": 0.1208, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.6903353057199211, |
|
"grad_norm": 0.603071928024292, |
|
"learning_rate": 4.854010577346849e-06, |
|
"loss": 0.1193, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.6958141573526189, |
|
"grad_norm": 0.6698121428489685, |
|
"learning_rate": 4.852633318642574e-06, |
|
"loss": 0.1209, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.7012930089853167, |
|
"grad_norm": 0.5631791353225708, |
|
"learning_rate": 4.8512560599382995e-06, |
|
"loss": 0.1223, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7067718606180144, |
|
"grad_norm": 0.5904573798179626, |
|
"learning_rate": 4.849878801234025e-06, |
|
"loss": 0.119, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.7122507122507122, |
|
"grad_norm": 0.6524720788002014, |
|
"learning_rate": 4.84850154252975e-06, |
|
"loss": 0.1194, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.71772956388341, |
|
"grad_norm": 0.6679468154907227, |
|
"learning_rate": 4.847124283825474e-06, |
|
"loss": 0.1224, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.7232084155161078, |
|
"grad_norm": 0.5249156951904297, |
|
"learning_rate": 4.845747025121199e-06, |
|
"loss": 0.1193, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7232084155161078, |
|
"eval_loss": 0.10731059312820435, |
|
"eval_runtime": 1334.5691, |
|
"eval_samples_per_second": 2.495, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 13.910143221502844, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7286872671488056, |
|
"grad_norm": 0.6170015931129456, |
|
"learning_rate": 4.844369766416924e-06, |
|
"loss": 0.118, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.7341661187815034, |
|
"grad_norm": 0.622870922088623, |
|
"learning_rate": 4.8429925077126496e-06, |
|
"loss": 0.1213, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.7396449704142012, |
|
"grad_norm": 0.6220366358757019, |
|
"learning_rate": 4.841615249008374e-06, |
|
"loss": 0.1199, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.745123822046899, |
|
"grad_norm": 0.6058914661407471, |
|
"learning_rate": 4.840237990304099e-06, |
|
"loss": 0.1177, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.7506026736795968, |
|
"grad_norm": 0.618475079536438, |
|
"learning_rate": 4.838860731599824e-06, |
|
"loss": 0.1193, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.7560815253122946, |
|
"grad_norm": 0.6013332009315491, |
|
"learning_rate": 4.8374834728955485e-06, |
|
"loss": 0.1185, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.7615603769449923, |
|
"grad_norm": 0.5604269504547119, |
|
"learning_rate": 4.836106214191274e-06, |
|
"loss": 0.1169, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.7670392285776901, |
|
"grad_norm": 0.5583498477935791, |
|
"learning_rate": 4.834728955486999e-06, |
|
"loss": 0.1205, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7725180802103879, |
|
"grad_norm": 0.5525631904602051, |
|
"learning_rate": 4.833351696782724e-06, |
|
"loss": 0.1192, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.7779969318430857, |
|
"grad_norm": 0.5905235409736633, |
|
"learning_rate": 4.831974438078449e-06, |
|
"loss": 0.1192, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.7834757834757835, |
|
"grad_norm": 0.5701056122779846, |
|
"learning_rate": 4.830597179374174e-06, |
|
"loss": 0.1189, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.7889546351084813, |
|
"grad_norm": 0.5544924139976501, |
|
"learning_rate": 4.829219920669899e-06, |
|
"loss": 0.1179, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.7889546351084813, |
|
"eval_loss": 0.10721833258867264, |
|
"eval_runtime": 1334.3717, |
|
"eval_samples_per_second": 2.496, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 13.76934528961673, |
|
"step": 3600 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 91260, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.527210695946341e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|