csikasote's picture
End of training
faffd74 verified
{
"best_metric": 0.3662048876285553,
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-swagen-combined-25hrs-model/checkpoint-4000",
"epoch": 2.28504034761018,
"eval_steps": 200,
"global_step": 4600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012414649286157667,
"grad_norm": 128.67588806152344,
"learning_rate": 4.0000000000000003e-07,
"loss": 10.5484,
"step": 25
},
{
"epoch": 0.024829298572315334,
"grad_norm": 123.8255844116211,
"learning_rate": 9.000000000000001e-07,
"loss": 8.1464,
"step": 50
},
{
"epoch": 0.037243947858473,
"grad_norm": 79.1299057006836,
"learning_rate": 1.4000000000000001e-06,
"loss": 5.9439,
"step": 75
},
{
"epoch": 0.04965859714463067,
"grad_norm": 79.47651672363281,
"learning_rate": 1.9000000000000002e-06,
"loss": 4.1515,
"step": 100
},
{
"epoch": 0.06207324643078833,
"grad_norm": 68.2268295288086,
"learning_rate": 2.4000000000000003e-06,
"loss": 3.4012,
"step": 125
},
{
"epoch": 0.074487895716946,
"grad_norm": 81.23241424560547,
"learning_rate": 2.9e-06,
"loss": 3.3427,
"step": 150
},
{
"epoch": 0.08690254500310367,
"grad_norm": 66.99320983886719,
"learning_rate": 3.4000000000000005e-06,
"loss": 2.968,
"step": 175
},
{
"epoch": 0.09931719428926133,
"grad_norm": 78.05485534667969,
"learning_rate": 3.900000000000001e-06,
"loss": 2.8233,
"step": 200
},
{
"epoch": 0.09931719428926133,
"eval_loss": 0.804746150970459,
"eval_runtime": 563.0664,
"eval_samples_per_second": 1.931,
"eval_steps_per_second": 0.966,
"eval_wer": 0.489650974025974,
"step": 200
},
{
"epoch": 0.11173184357541899,
"grad_norm": 46.1437873840332,
"learning_rate": 4.4e-06,
"loss": 2.6012,
"step": 225
},
{
"epoch": 0.12414649286157665,
"grad_norm": 99.51728057861328,
"learning_rate": 4.9000000000000005e-06,
"loss": 2.2989,
"step": 250
},
{
"epoch": 0.13656114214773432,
"grad_norm": 49.41315460205078,
"learning_rate": 5.400000000000001e-06,
"loss": 2.2207,
"step": 275
},
{
"epoch": 0.148975791433892,
"grad_norm": 53.38062286376953,
"learning_rate": 5.9e-06,
"loss": 2.202,
"step": 300
},
{
"epoch": 0.16139044072004965,
"grad_norm": 49.83573913574219,
"learning_rate": 6.4000000000000006e-06,
"loss": 2.2695,
"step": 325
},
{
"epoch": 0.17380509000620734,
"grad_norm": 75.33547973632812,
"learning_rate": 6.9e-06,
"loss": 1.9705,
"step": 350
},
{
"epoch": 0.186219739292365,
"grad_norm": 55.35056686401367,
"learning_rate": 7.4e-06,
"loss": 2.0473,
"step": 375
},
{
"epoch": 0.19863438857852267,
"grad_norm": 46.99931335449219,
"learning_rate": 7.9e-06,
"loss": 1.9329,
"step": 400
},
{
"epoch": 0.19863438857852267,
"eval_loss": 0.6190668940544128,
"eval_runtime": 574.5315,
"eval_samples_per_second": 1.892,
"eval_steps_per_second": 0.947,
"eval_wer": 0.401075487012987,
"step": 400
},
{
"epoch": 0.21104903786468032,
"grad_norm": 57.05539321899414,
"learning_rate": 8.400000000000001e-06,
"loss": 1.8312,
"step": 425
},
{
"epoch": 0.22346368715083798,
"grad_norm": 53.04418182373047,
"learning_rate": 8.900000000000001e-06,
"loss": 1.8474,
"step": 450
},
{
"epoch": 0.23587833643699566,
"grad_norm": 53.90583801269531,
"learning_rate": 9.4e-06,
"loss": 1.9193,
"step": 475
},
{
"epoch": 0.2482929857231533,
"grad_norm": 51.17042922973633,
"learning_rate": 9.9e-06,
"loss": 1.725,
"step": 500
},
{
"epoch": 0.260707635009311,
"grad_norm": 42.38318634033203,
"learning_rate": 9.996660544331274e-06,
"loss": 1.8561,
"step": 525
},
{
"epoch": 0.27312228429546864,
"grad_norm": 51.599029541015625,
"learning_rate": 9.992486224745367e-06,
"loss": 1.9738,
"step": 550
},
{
"epoch": 0.2855369335816263,
"grad_norm": 59.115108489990234,
"learning_rate": 9.98831190515946e-06,
"loss": 1.6793,
"step": 575
},
{
"epoch": 0.297951582867784,
"grad_norm": 42.64860534667969,
"learning_rate": 9.984137585573552e-06,
"loss": 1.6927,
"step": 600
},
{
"epoch": 0.297951582867784,
"eval_loss": 0.5420816540718079,
"eval_runtime": 589.5719,
"eval_samples_per_second": 1.844,
"eval_steps_per_second": 0.923,
"eval_wer": 0.37905844155844154,
"step": 600
},
{
"epoch": 0.31036623215394166,
"grad_norm": 44.46907043457031,
"learning_rate": 9.979963265987644e-06,
"loss": 1.6992,
"step": 625
},
{
"epoch": 0.3227808814400993,
"grad_norm": 38.840396881103516,
"learning_rate": 9.975788946401737e-06,
"loss": 1.5042,
"step": 650
},
{
"epoch": 0.33519553072625696,
"grad_norm": 46.88064956665039,
"learning_rate": 9.97161462681583e-06,
"loss": 1.7753,
"step": 675
},
{
"epoch": 0.34761018001241467,
"grad_norm": 44.91327667236328,
"learning_rate": 9.967440307229922e-06,
"loss": 1.7618,
"step": 700
},
{
"epoch": 0.3600248292985723,
"grad_norm": 42.24628448486328,
"learning_rate": 9.963265987644016e-06,
"loss": 1.6682,
"step": 725
},
{
"epoch": 0.37243947858473,
"grad_norm": 45.74182891845703,
"learning_rate": 9.959091668058107e-06,
"loss": 1.5824,
"step": 750
},
{
"epoch": 0.38485412787088763,
"grad_norm": 25.388633728027344,
"learning_rate": 9.954917348472199e-06,
"loss": 1.6692,
"step": 775
},
{
"epoch": 0.39726877715704534,
"grad_norm": 33.251548767089844,
"learning_rate": 9.950743028886292e-06,
"loss": 1.6183,
"step": 800
},
{
"epoch": 0.39726877715704534,
"eval_loss": 0.48888257145881653,
"eval_runtime": 577.1733,
"eval_samples_per_second": 1.883,
"eval_steps_per_second": 0.943,
"eval_wer": 0.3210227272727273,
"step": 800
},
{
"epoch": 0.409683426443203,
"grad_norm": 30.38732147216797,
"learning_rate": 9.946568709300385e-06,
"loss": 1.4884,
"step": 825
},
{
"epoch": 0.42209807572936064,
"grad_norm": 48.94175338745117,
"learning_rate": 9.942394389714477e-06,
"loss": 1.4615,
"step": 850
},
{
"epoch": 0.4345127250155183,
"grad_norm": 29.04236602783203,
"learning_rate": 9.93822007012857e-06,
"loss": 1.5201,
"step": 875
},
{
"epoch": 0.44692737430167595,
"grad_norm": 41.91320037841797,
"learning_rate": 9.934045750542662e-06,
"loss": 1.5147,
"step": 900
},
{
"epoch": 0.45934202358783366,
"grad_norm": 40.610572814941406,
"learning_rate": 9.929871430956755e-06,
"loss": 1.4561,
"step": 925
},
{
"epoch": 0.4717566728739913,
"grad_norm": 33.01325988769531,
"learning_rate": 9.925697111370847e-06,
"loss": 1.3772,
"step": 950
},
{
"epoch": 0.48417132216014896,
"grad_norm": 40.93734359741211,
"learning_rate": 9.92152279178494e-06,
"loss": 1.549,
"step": 975
},
{
"epoch": 0.4965859714463066,
"grad_norm": 41.81599044799805,
"learning_rate": 9.917348472199032e-06,
"loss": 1.4431,
"step": 1000
},
{
"epoch": 0.4965859714463066,
"eval_loss": 0.4683995544910431,
"eval_runtime": 563.8925,
"eval_samples_per_second": 1.928,
"eval_steps_per_second": 0.965,
"eval_wer": 0.28662743506493504,
"step": 1000
},
{
"epoch": 0.5090006207324643,
"grad_norm": 23.732839584350586,
"learning_rate": 9.913174152613125e-06,
"loss": 1.2911,
"step": 1025
},
{
"epoch": 0.521415270018622,
"grad_norm": 35.39672088623047,
"learning_rate": 9.908999833027217e-06,
"loss": 1.2753,
"step": 1050
},
{
"epoch": 0.5338299193047796,
"grad_norm": 20.741168975830078,
"learning_rate": 9.90482551344131e-06,
"loss": 1.4464,
"step": 1075
},
{
"epoch": 0.5462445685909373,
"grad_norm": 44.05943298339844,
"learning_rate": 9.900651193855404e-06,
"loss": 1.2189,
"step": 1100
},
{
"epoch": 0.5586592178770949,
"grad_norm": 30.3934268951416,
"learning_rate": 9.896476874269495e-06,
"loss": 1.357,
"step": 1125
},
{
"epoch": 0.5710738671632526,
"grad_norm": 39.36647415161133,
"learning_rate": 9.892302554683587e-06,
"loss": 1.3864,
"step": 1150
},
{
"epoch": 0.5834885164494104,
"grad_norm": 39.50497055053711,
"learning_rate": 9.88812823509768e-06,
"loss": 1.5879,
"step": 1175
},
{
"epoch": 0.595903165735568,
"grad_norm": 52.04657745361328,
"learning_rate": 9.883953915511772e-06,
"loss": 1.4117,
"step": 1200
},
{
"epoch": 0.595903165735568,
"eval_loss": 0.42576098442077637,
"eval_runtime": 574.3755,
"eval_samples_per_second": 1.892,
"eval_steps_per_second": 0.947,
"eval_wer": 0.2650162337662338,
"step": 1200
},
{
"epoch": 0.6083178150217257,
"grad_norm": 41.47892761230469,
"learning_rate": 9.879779595925865e-06,
"loss": 1.2806,
"step": 1225
},
{
"epoch": 0.6207324643078833,
"grad_norm": 35.136695861816406,
"learning_rate": 9.875605276339958e-06,
"loss": 1.2739,
"step": 1250
},
{
"epoch": 0.633147113594041,
"grad_norm": 29.484039306640625,
"learning_rate": 9.87143095675405e-06,
"loss": 1.1364,
"step": 1275
},
{
"epoch": 0.6455617628801986,
"grad_norm": 47.20607376098633,
"learning_rate": 9.867256637168142e-06,
"loss": 1.1565,
"step": 1300
},
{
"epoch": 0.6579764121663563,
"grad_norm": 44.51639938354492,
"learning_rate": 9.863082317582235e-06,
"loss": 1.2704,
"step": 1325
},
{
"epoch": 0.6703910614525139,
"grad_norm": 56.79221725463867,
"learning_rate": 9.858907997996328e-06,
"loss": 1.3655,
"step": 1350
},
{
"epoch": 0.6828057107386716,
"grad_norm": 31.851566314697266,
"learning_rate": 9.85473367841042e-06,
"loss": 1.2962,
"step": 1375
},
{
"epoch": 0.6952203600248293,
"grad_norm": 48.65141677856445,
"learning_rate": 9.850559358824512e-06,
"loss": 1.2699,
"step": 1400
},
{
"epoch": 0.6952203600248293,
"eval_loss": 0.4222487807273865,
"eval_runtime": 572.8012,
"eval_samples_per_second": 1.898,
"eval_steps_per_second": 0.95,
"eval_wer": 0.26653814935064934,
"step": 1400
},
{
"epoch": 0.707635009310987,
"grad_norm": 32.21327209472656,
"learning_rate": 9.846385039238605e-06,
"loss": 1.1561,
"step": 1425
},
{
"epoch": 0.7200496585971446,
"grad_norm": 38.31489181518555,
"learning_rate": 9.842210719652696e-06,
"loss": 1.3146,
"step": 1450
},
{
"epoch": 0.7324643078833023,
"grad_norm": 56.665260314941406,
"learning_rate": 9.83803640006679e-06,
"loss": 1.3184,
"step": 1475
},
{
"epoch": 0.74487895716946,
"grad_norm": 49.64814758300781,
"learning_rate": 9.833862080480883e-06,
"loss": 1.0521,
"step": 1500
},
{
"epoch": 0.7572936064556176,
"grad_norm": 32.33070373535156,
"learning_rate": 9.829687760894975e-06,
"loss": 1.2677,
"step": 1525
},
{
"epoch": 0.7697082557417753,
"grad_norm": 27.896947860717773,
"learning_rate": 9.825513441309066e-06,
"loss": 1.3059,
"step": 1550
},
{
"epoch": 0.7821229050279329,
"grad_norm": 28.060487747192383,
"learning_rate": 9.82133912172316e-06,
"loss": 1.3901,
"step": 1575
},
{
"epoch": 0.7945375543140907,
"grad_norm": 32.01655578613281,
"learning_rate": 9.817164802137253e-06,
"loss": 1.0532,
"step": 1600
},
{
"epoch": 0.7945375543140907,
"eval_loss": 0.41084742546081543,
"eval_runtime": 564.5825,
"eval_samples_per_second": 1.925,
"eval_steps_per_second": 0.964,
"eval_wer": 0.2513189935064935,
"step": 1600
},
{
"epoch": 0.8069522036002483,
"grad_norm": 37.56877136230469,
"learning_rate": 9.812990482551345e-06,
"loss": 1.2314,
"step": 1625
},
{
"epoch": 0.819366852886406,
"grad_norm": 23.31650161743164,
"learning_rate": 9.808816162965438e-06,
"loss": 1.214,
"step": 1650
},
{
"epoch": 0.8317815021725636,
"grad_norm": 52.62869644165039,
"learning_rate": 9.80464184337953e-06,
"loss": 1.1148,
"step": 1675
},
{
"epoch": 0.8441961514587213,
"grad_norm": 37.902523040771484,
"learning_rate": 9.800467523793621e-06,
"loss": 1.1947,
"step": 1700
},
{
"epoch": 0.8566108007448789,
"grad_norm": 46.63554382324219,
"learning_rate": 9.796293204207715e-06,
"loss": 1.1841,
"step": 1725
},
{
"epoch": 0.8690254500310366,
"grad_norm": 24.407249450683594,
"learning_rate": 9.792118884621808e-06,
"loss": 1.0706,
"step": 1750
},
{
"epoch": 0.8814400993171942,
"grad_norm": 33.92270278930664,
"learning_rate": 9.7879445650359e-06,
"loss": 1.0771,
"step": 1775
},
{
"epoch": 0.8938547486033519,
"grad_norm": 36.15495681762695,
"learning_rate": 9.783770245449993e-06,
"loss": 1.0589,
"step": 1800
},
{
"epoch": 0.8938547486033519,
"eval_loss": 0.39820805191993713,
"eval_runtime": 559.8426,
"eval_samples_per_second": 1.942,
"eval_steps_per_second": 0.972,
"eval_wer": 0.22909902597402598,
"step": 1800
},
{
"epoch": 0.9062693978895097,
"grad_norm": 39.94309616088867,
"learning_rate": 9.779595925864084e-06,
"loss": 1.219,
"step": 1825
},
{
"epoch": 0.9186840471756673,
"grad_norm": 29.685474395751953,
"learning_rate": 9.775421606278178e-06,
"loss": 1.2091,
"step": 1850
},
{
"epoch": 0.931098696461825,
"grad_norm": 39.77056121826172,
"learning_rate": 9.771247286692271e-06,
"loss": 1.2096,
"step": 1875
},
{
"epoch": 0.9435133457479826,
"grad_norm": 22.495344161987305,
"learning_rate": 9.767072967106363e-06,
"loss": 1.1108,
"step": 1900
},
{
"epoch": 0.9559279950341403,
"grad_norm": 42.11180114746094,
"learning_rate": 9.762898647520454e-06,
"loss": 1.0949,
"step": 1925
},
{
"epoch": 0.9683426443202979,
"grad_norm": 41.73212432861328,
"learning_rate": 9.758724327934548e-06,
"loss": 1.2428,
"step": 1950
},
{
"epoch": 0.9807572936064556,
"grad_norm": 39.16131591796875,
"learning_rate": 9.75455000834864e-06,
"loss": 0.9964,
"step": 1975
},
{
"epoch": 0.9931719428926132,
"grad_norm": 27.52761459350586,
"learning_rate": 9.750375688762733e-06,
"loss": 1.1856,
"step": 2000
},
{
"epoch": 0.9931719428926132,
"eval_loss": 0.3853071331977844,
"eval_runtime": 565.6151,
"eval_samples_per_second": 1.922,
"eval_steps_per_second": 0.962,
"eval_wer": 0.23549107142857142,
"step": 2000
},
{
"epoch": 1.0059590316573557,
"grad_norm": 19.790531158447266,
"learning_rate": 9.746201369176826e-06,
"loss": 0.9702,
"step": 2025
},
{
"epoch": 1.0183736809435133,
"grad_norm": 24.30504035949707,
"learning_rate": 9.742027049590918e-06,
"loss": 0.6177,
"step": 2050
},
{
"epoch": 1.030788330229671,
"grad_norm": 25.81077003479004,
"learning_rate": 9.73785273000501e-06,
"loss": 0.5878,
"step": 2075
},
{
"epoch": 1.0432029795158286,
"grad_norm": 29.500877380371094,
"learning_rate": 9.733678410419102e-06,
"loss": 0.6152,
"step": 2100
},
{
"epoch": 1.0556176288019863,
"grad_norm": 18.39103889465332,
"learning_rate": 9.729504090833196e-06,
"loss": 0.5966,
"step": 2125
},
{
"epoch": 1.068032278088144,
"grad_norm": 42.394142150878906,
"learning_rate": 9.725329771247287e-06,
"loss": 0.6365,
"step": 2150
},
{
"epoch": 1.0804469273743016,
"grad_norm": 19.30755043029785,
"learning_rate": 9.72115545166138e-06,
"loss": 0.6584,
"step": 2175
},
{
"epoch": 1.0928615766604592,
"grad_norm": 22.643875122070312,
"learning_rate": 9.716981132075472e-06,
"loss": 0.6692,
"step": 2200
},
{
"epoch": 1.0928615766604592,
"eval_loss": 0.40007734298706055,
"eval_runtime": 581.1524,
"eval_samples_per_second": 1.87,
"eval_steps_per_second": 0.936,
"eval_wer": 0.2650162337662338,
"step": 2200
},
{
"epoch": 1.105276225946617,
"grad_norm": 28.355436325073242,
"learning_rate": 9.712806812489564e-06,
"loss": 0.6398,
"step": 2225
},
{
"epoch": 1.1176908752327748,
"grad_norm": 29.392656326293945,
"learning_rate": 9.708632492903657e-06,
"loss": 0.64,
"step": 2250
},
{
"epoch": 1.1301055245189324,
"grad_norm": 25.52250099182129,
"learning_rate": 9.70445817331775e-06,
"loss": 0.6339,
"step": 2275
},
{
"epoch": 1.14252017380509,
"grad_norm": 26.52411460876465,
"learning_rate": 9.700283853731842e-06,
"loss": 0.5372,
"step": 2300
},
{
"epoch": 1.1549348230912477,
"grad_norm": 26.201452255249023,
"learning_rate": 9.696109534145936e-06,
"loss": 0.5878,
"step": 2325
},
{
"epoch": 1.1673494723774054,
"grad_norm": 23.98987579345703,
"learning_rate": 9.691935214560027e-06,
"loss": 0.5349,
"step": 2350
},
{
"epoch": 1.179764121663563,
"grad_norm": 32.815521240234375,
"learning_rate": 9.68776089497412e-06,
"loss": 0.7508,
"step": 2375
},
{
"epoch": 1.1921787709497207,
"grad_norm": 23.12726593017578,
"learning_rate": 9.683586575388212e-06,
"loss": 0.6505,
"step": 2400
},
{
"epoch": 1.1921787709497207,
"eval_loss": 0.39191773533821106,
"eval_runtime": 571.3726,
"eval_samples_per_second": 1.902,
"eval_steps_per_second": 0.952,
"eval_wer": 0.23894074675324675,
"step": 2400
},
{
"epoch": 1.2045934202358783,
"grad_norm": 19.867704391479492,
"learning_rate": 9.679412255802305e-06,
"loss": 0.5807,
"step": 2425
},
{
"epoch": 1.217008069522036,
"grad_norm": 19.685293197631836,
"learning_rate": 9.675237936216397e-06,
"loss": 0.7044,
"step": 2450
},
{
"epoch": 1.2294227188081936,
"grad_norm": 28.70237159729004,
"learning_rate": 9.67106361663049e-06,
"loss": 0.6598,
"step": 2475
},
{
"epoch": 1.2418373680943513,
"grad_norm": 36.98805618286133,
"learning_rate": 9.666889297044582e-06,
"loss": 0.6079,
"step": 2500
},
{
"epoch": 1.254252017380509,
"grad_norm": 22.906494140625,
"learning_rate": 9.662714977458675e-06,
"loss": 0.7132,
"step": 2525
},
{
"epoch": 1.2666666666666666,
"grad_norm": 21.013233184814453,
"learning_rate": 9.658540657872769e-06,
"loss": 0.6346,
"step": 2550
},
{
"epoch": 1.2790813159528243,
"grad_norm": 22.889606475830078,
"learning_rate": 9.65436633828686e-06,
"loss": 0.5689,
"step": 2575
},
{
"epoch": 1.291495965238982,
"grad_norm": 21.3165225982666,
"learning_rate": 9.650192018700952e-06,
"loss": 0.6613,
"step": 2600
},
{
"epoch": 1.291495965238982,
"eval_loss": 0.3809148669242859,
"eval_runtime": 575.5999,
"eval_samples_per_second": 1.888,
"eval_steps_per_second": 0.945,
"eval_wer": 0.2385349025974026,
"step": 2600
},
{
"epoch": 1.3039106145251398,
"grad_norm": 31.75080108642578,
"learning_rate": 9.646017699115045e-06,
"loss": 0.6436,
"step": 2625
},
{
"epoch": 1.3163252638112972,
"grad_norm": 30.9864559173584,
"learning_rate": 9.641843379529137e-06,
"loss": 0.696,
"step": 2650
},
{
"epoch": 1.328739913097455,
"grad_norm": 30.82682991027832,
"learning_rate": 9.63766905994323e-06,
"loss": 0.5955,
"step": 2675
},
{
"epoch": 1.3411545623836125,
"grad_norm": 34.10749435424805,
"learning_rate": 9.633494740357322e-06,
"loss": 0.7117,
"step": 2700
},
{
"epoch": 1.3535692116697704,
"grad_norm": 30.104955673217773,
"learning_rate": 9.629320420771415e-06,
"loss": 0.5666,
"step": 2725
},
{
"epoch": 1.365983860955928,
"grad_norm": 23.225740432739258,
"learning_rate": 9.625146101185507e-06,
"loss": 0.5734,
"step": 2750
},
{
"epoch": 1.3783985102420857,
"grad_norm": 20.32614517211914,
"learning_rate": 9.6209717815996e-06,
"loss": 0.6535,
"step": 2775
},
{
"epoch": 1.3908131595282434,
"grad_norm": 23.999792098999023,
"learning_rate": 9.616797462013693e-06,
"loss": 0.6194,
"step": 2800
},
{
"epoch": 1.3908131595282434,
"eval_loss": 0.3873368799686432,
"eval_runtime": 568.9878,
"eval_samples_per_second": 1.91,
"eval_steps_per_second": 0.956,
"eval_wer": 0.23427353896103897,
"step": 2800
},
{
"epoch": 1.403227808814401,
"grad_norm": 18.715627670288086,
"learning_rate": 9.612623142427785e-06,
"loss": 0.5924,
"step": 2825
},
{
"epoch": 1.4156424581005587,
"grad_norm": 24.6026611328125,
"learning_rate": 9.608448822841877e-06,
"loss": 0.5588,
"step": 2850
},
{
"epoch": 1.4280571073867163,
"grad_norm": 32.74100875854492,
"learning_rate": 9.60427450325597e-06,
"loss": 0.6261,
"step": 2875
},
{
"epoch": 1.440471756672874,
"grad_norm": 31.3200740814209,
"learning_rate": 9.600100183670062e-06,
"loss": 0.756,
"step": 2900
},
{
"epoch": 1.4528864059590316,
"grad_norm": 19.404541015625,
"learning_rate": 9.595925864084155e-06,
"loss": 0.6082,
"step": 2925
},
{
"epoch": 1.4653010552451893,
"grad_norm": 16.61175537109375,
"learning_rate": 9.591751544498248e-06,
"loss": 0.6567,
"step": 2950
},
{
"epoch": 1.477715704531347,
"grad_norm": 22.71599006652832,
"learning_rate": 9.587744197695776e-06,
"loss": 0.6098,
"step": 2975
},
{
"epoch": 1.4901303538175046,
"grad_norm": 32.15653610229492,
"learning_rate": 9.583569878109869e-06,
"loss": 0.6358,
"step": 3000
},
{
"epoch": 1.4901303538175046,
"eval_loss": 0.38495373725891113,
"eval_runtime": 561.3182,
"eval_samples_per_second": 1.937,
"eval_steps_per_second": 0.969,
"eval_wer": 0.21418425324675325,
"step": 3000
},
{
"epoch": 1.5025450031036622,
"grad_norm": 22.268293380737305,
"learning_rate": 9.579395558523962e-06,
"loss": 0.5949,
"step": 3025
},
{
"epoch": 1.51495965238982,
"grad_norm": 28.58846092224121,
"learning_rate": 9.575221238938054e-06,
"loss": 0.6006,
"step": 3050
},
{
"epoch": 1.5273743016759775,
"grad_norm": 25.382551193237305,
"learning_rate": 9.571046919352145e-06,
"loss": 0.5811,
"step": 3075
},
{
"epoch": 1.5397889509621354,
"grad_norm": 34.780006408691406,
"learning_rate": 9.566872599766239e-06,
"loss": 0.5968,
"step": 3100
},
{
"epoch": 1.5522036002482928,
"grad_norm": 21.326889038085938,
"learning_rate": 9.562698280180332e-06,
"loss": 0.4749,
"step": 3125
},
{
"epoch": 1.5646182495344507,
"grad_norm": 27.90545654296875,
"learning_rate": 9.558523960594424e-06,
"loss": 0.6064,
"step": 3150
},
{
"epoch": 1.5770328988206082,
"grad_norm": 22.328035354614258,
"learning_rate": 9.554349641008517e-06,
"loss": 0.5755,
"step": 3175
},
{
"epoch": 1.589447548106766,
"grad_norm": 23.400901794433594,
"learning_rate": 9.550175321422609e-06,
"loss": 0.6208,
"step": 3200
},
{
"epoch": 1.589447548106766,
"eval_loss": 0.37794527411460876,
"eval_runtime": 565.9599,
"eval_samples_per_second": 1.921,
"eval_steps_per_second": 0.961,
"eval_wer": 0.23883928571428573,
"step": 3200
},
{
"epoch": 1.6018621973929237,
"grad_norm": 21.570287704467773,
"learning_rate": 9.5460010018367e-06,
"loss": 0.5788,
"step": 3225
},
{
"epoch": 1.6142768466790813,
"grad_norm": 27.813451766967773,
"learning_rate": 9.541826682250794e-06,
"loss": 0.6945,
"step": 3250
},
{
"epoch": 1.626691495965239,
"grad_norm": 30.955820083618164,
"learning_rate": 9.537652362664887e-06,
"loss": 0.5379,
"step": 3275
},
{
"epoch": 1.6391061452513966,
"grad_norm": 20.53118133544922,
"learning_rate": 9.533478043078979e-06,
"loss": 0.6171,
"step": 3300
},
{
"epoch": 1.6515207945375543,
"grad_norm": 23.763132095336914,
"learning_rate": 9.529303723493072e-06,
"loss": 0.6021,
"step": 3325
},
{
"epoch": 1.663935443823712,
"grad_norm": 26.67987632751465,
"learning_rate": 9.525129403907164e-06,
"loss": 0.6727,
"step": 3350
},
{
"epoch": 1.6763500931098696,
"grad_norm": 25.991594314575195,
"learning_rate": 9.520955084321257e-06,
"loss": 0.7155,
"step": 3375
},
{
"epoch": 1.6887647423960273,
"grad_norm": 19.079315185546875,
"learning_rate": 9.51678076473535e-06,
"loss": 0.5932,
"step": 3400
},
{
"epoch": 1.6887647423960273,
"eval_loss": 0.3724534511566162,
"eval_runtime": 550.7846,
"eval_samples_per_second": 1.974,
"eval_steps_per_second": 0.988,
"eval_wer": 0.20403814935064934,
"step": 3400
},
{
"epoch": 1.7011793916821851,
"grad_norm": 18.52420997619629,
"learning_rate": 9.512606445149442e-06,
"loss": 0.6704,
"step": 3425
},
{
"epoch": 1.7135940409683426,
"grad_norm": 19.514951705932617,
"learning_rate": 9.508432125563533e-06,
"loss": 0.5896,
"step": 3450
},
{
"epoch": 1.7260086902545004,
"grad_norm": 28.89137840270996,
"learning_rate": 9.504257805977627e-06,
"loss": 0.5097,
"step": 3475
},
{
"epoch": 1.7384233395406579,
"grad_norm": 32.02205276489258,
"learning_rate": 9.500083486391718e-06,
"loss": 0.6217,
"step": 3500
},
{
"epoch": 1.7508379888268157,
"grad_norm": 36.85642623901367,
"learning_rate": 9.495909166805812e-06,
"loss": 0.666,
"step": 3525
},
{
"epoch": 1.7632526381129732,
"grad_norm": 37.10481262207031,
"learning_rate": 9.491734847219905e-06,
"loss": 0.5903,
"step": 3550
},
{
"epoch": 1.775667287399131,
"grad_norm": 19.526355743408203,
"learning_rate": 9.487560527633997e-06,
"loss": 0.5304,
"step": 3575
},
{
"epoch": 1.7880819366852885,
"grad_norm": 30.528167724609375,
"learning_rate": 9.483386208048088e-06,
"loss": 0.5797,
"step": 3600
},
{
"epoch": 1.7880819366852885,
"eval_loss": 0.3712182641029358,
"eval_runtime": 558.4122,
"eval_samples_per_second": 1.947,
"eval_steps_per_second": 0.974,
"eval_wer": 0.20921266233766234,
"step": 3600
},
{
"epoch": 1.8004965859714464,
"grad_norm": 29.263221740722656,
"learning_rate": 9.479211888462182e-06,
"loss": 0.6156,
"step": 3625
},
{
"epoch": 1.812911235257604,
"grad_norm": 23.728296279907227,
"learning_rate": 9.475037568876275e-06,
"loss": 0.6568,
"step": 3650
},
{
"epoch": 1.8253258845437617,
"grad_norm": 15.723759651184082,
"learning_rate": 9.470863249290367e-06,
"loss": 0.566,
"step": 3675
},
{
"epoch": 1.8377405338299193,
"grad_norm": 39.088584899902344,
"learning_rate": 9.466688929704458e-06,
"loss": 0.7007,
"step": 3700
},
{
"epoch": 1.850155183116077,
"grad_norm": 20.931364059448242,
"learning_rate": 9.462514610118551e-06,
"loss": 0.6843,
"step": 3725
},
{
"epoch": 1.8625698324022346,
"grad_norm": 23.179536819458008,
"learning_rate": 9.458340290532643e-06,
"loss": 0.6391,
"step": 3750
},
{
"epoch": 1.8749844816883923,
"grad_norm": 31.087736129760742,
"learning_rate": 9.454165970946736e-06,
"loss": 0.6611,
"step": 3775
},
{
"epoch": 1.88739913097455,
"grad_norm": 22.13474464416504,
"learning_rate": 9.44999165136083e-06,
"loss": 0.5707,
"step": 3800
},
{
"epoch": 1.88739913097455,
"eval_loss": 0.37375178933143616,
"eval_runtime": 565.1592,
"eval_samples_per_second": 1.923,
"eval_steps_per_second": 0.963,
"eval_wer": 0.23417207792207792,
"step": 3800
},
{
"epoch": 1.8998137802607076,
"grad_norm": 22.615114212036133,
"learning_rate": 9.445817331774921e-06,
"loss": 0.5573,
"step": 3825
},
{
"epoch": 1.9122284295468654,
"grad_norm": 32.943199157714844,
"learning_rate": 9.441643012189013e-06,
"loss": 0.6528,
"step": 3850
},
{
"epoch": 1.9246430788330229,
"grad_norm": 29.096609115600586,
"learning_rate": 9.437468692603106e-06,
"loss": 0.7014,
"step": 3875
},
{
"epoch": 1.9370577281191808,
"grad_norm": 18.50649642944336,
"learning_rate": 9.4332943730172e-06,
"loss": 0.5836,
"step": 3900
},
{
"epoch": 1.9494723774053382,
"grad_norm": 27.316129684448242,
"learning_rate": 9.429120053431291e-06,
"loss": 0.5993,
"step": 3925
},
{
"epoch": 1.961887026691496,
"grad_norm": 26.35407257080078,
"learning_rate": 9.424945733845385e-06,
"loss": 0.5874,
"step": 3950
},
{
"epoch": 1.9743016759776535,
"grad_norm": 23.183897018432617,
"learning_rate": 9.420771414259476e-06,
"loss": 0.6319,
"step": 3975
},
{
"epoch": 1.9867163252638114,
"grad_norm": 25.644729614257812,
"learning_rate": 9.416597094673568e-06,
"loss": 0.5928,
"step": 4000
},
{
"epoch": 1.9867163252638114,
"eval_loss": 0.3662048876285553,
"eval_runtime": 574.0467,
"eval_samples_per_second": 1.894,
"eval_steps_per_second": 0.948,
"eval_wer": 0.25892857142857145,
"step": 4000
},
{
"epoch": 1.9991309745499688,
"grad_norm": 15.862359046936035,
"learning_rate": 9.412422775087661e-06,
"loss": 0.5867,
"step": 4025
},
{
"epoch": 2.0119180633147113,
"grad_norm": 15.233346939086914,
"learning_rate": 9.408248455501754e-06,
"loss": 0.2928,
"step": 4050
},
{
"epoch": 2.024332712600869,
"grad_norm": 41.226078033447266,
"learning_rate": 9.404074135915846e-06,
"loss": 0.2906,
"step": 4075
},
{
"epoch": 2.0367473618870267,
"grad_norm": 16.719274520874023,
"learning_rate": 9.39989981632994e-06,
"loss": 0.3043,
"step": 4100
},
{
"epoch": 2.0491620111731845,
"grad_norm": 17.11972999572754,
"learning_rate": 9.395725496744031e-06,
"loss": 0.3007,
"step": 4125
},
{
"epoch": 2.061576660459342,
"grad_norm": 25.817195892333984,
"learning_rate": 9.391551177158124e-06,
"loss": 0.3189,
"step": 4150
},
{
"epoch": 2.0739913097455,
"grad_norm": 22.05105972290039,
"learning_rate": 9.387376857572218e-06,
"loss": 0.2891,
"step": 4175
},
{
"epoch": 2.0864059590316573,
"grad_norm": 21.231904983520508,
"learning_rate": 9.38320253798631e-06,
"loss": 0.2626,
"step": 4200
},
{
"epoch": 2.0864059590316573,
"eval_loss": 0.3803122341632843,
"eval_runtime": 575.7956,
"eval_samples_per_second": 1.888,
"eval_steps_per_second": 0.945,
"eval_wer": 0.26968344155844154,
"step": 4200
},
{
"epoch": 2.098820608317815,
"grad_norm": 21.424543380737305,
"learning_rate": 9.379028218400401e-06,
"loss": 0.2837,
"step": 4225
},
{
"epoch": 2.1112352576039726,
"grad_norm": 20.14120864868164,
"learning_rate": 9.374853898814494e-06,
"loss": 0.2861,
"step": 4250
},
{
"epoch": 2.1236499068901304,
"grad_norm": 29.401103973388672,
"learning_rate": 9.370679579228586e-06,
"loss": 0.28,
"step": 4275
},
{
"epoch": 2.136064556176288,
"grad_norm": 15.73469352722168,
"learning_rate": 9.36650525964268e-06,
"loss": 0.2564,
"step": 4300
},
{
"epoch": 2.1484792054624458,
"grad_norm": 16.33969497680664,
"learning_rate": 9.362330940056773e-06,
"loss": 0.2648,
"step": 4325
},
{
"epoch": 2.160893854748603,
"grad_norm": 13.485337257385254,
"learning_rate": 9.358156620470864e-06,
"loss": 0.2615,
"step": 4350
},
{
"epoch": 2.173308504034761,
"grad_norm": 17.95641326904297,
"learning_rate": 9.353982300884956e-06,
"loss": 0.2943,
"step": 4375
},
{
"epoch": 2.1857231533209185,
"grad_norm": 20.702796936035156,
"learning_rate": 9.349807981299049e-06,
"loss": 0.2557,
"step": 4400
},
{
"epoch": 2.1857231533209185,
"eval_loss": 0.3853345811367035,
"eval_runtime": 558.2923,
"eval_samples_per_second": 1.947,
"eval_steps_per_second": 0.974,
"eval_wer": 0.21022727272727273,
"step": 4400
},
{
"epoch": 2.1981378026070764,
"grad_norm": 19.750181198120117,
"learning_rate": 9.345633661713142e-06,
"loss": 0.2404,
"step": 4425
},
{
"epoch": 2.210552451893234,
"grad_norm": 21.653345108032227,
"learning_rate": 9.341459342127234e-06,
"loss": 0.3591,
"step": 4450
},
{
"epoch": 2.2229671011793917,
"grad_norm": 17.557680130004883,
"learning_rate": 9.337285022541327e-06,
"loss": 0.3089,
"step": 4475
},
{
"epoch": 2.2353817504655495,
"grad_norm": 6.009192943572998,
"learning_rate": 9.333110702955419e-06,
"loss": 0.3418,
"step": 4500
},
{
"epoch": 2.247796399751707,
"grad_norm": 17.482463836669922,
"learning_rate": 9.32893638336951e-06,
"loss": 0.2301,
"step": 4525
},
{
"epoch": 2.260211049037865,
"grad_norm": 13.825834274291992,
"learning_rate": 9.324762063783604e-06,
"loss": 0.3232,
"step": 4550
},
{
"epoch": 2.2726256983240223,
"grad_norm": 9.021127700805664,
"learning_rate": 9.320587744197697e-06,
"loss": 0.3317,
"step": 4575
},
{
"epoch": 2.28504034761018,
"grad_norm": 22.399105072021484,
"learning_rate": 9.316413424611789e-06,
"loss": 0.3342,
"step": 4600
},
{
"epoch": 2.28504034761018,
"eval_loss": 0.38909900188446045,
"eval_runtime": 555.7727,
"eval_samples_per_second": 1.956,
"eval_steps_per_second": 0.979,
"eval_wer": 0.20616883116883117,
"step": 4600
},
{
"epoch": 2.28504034761018,
"step": 4600,
"total_flos": 3.756642543599616e+19,
"train_loss": 1.1144439057681872,
"train_runtime": 22344.0923,
"train_samples_per_second": 21.63,
"train_steps_per_second": 2.703
}
],
"logging_steps": 25,
"max_steps": 60390,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 200,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.756642543599616e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}