|
{ |
|
"best_metric": 0.3662048876285553, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-swagen-combined-25hrs-model/checkpoint-4000", |
|
"epoch": 2.28504034761018, |
|
"eval_steps": 200, |
|
"global_step": 4600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012414649286157667, |
|
"grad_norm": 128.67588806152344, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 10.5484, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.024829298572315334, |
|
"grad_norm": 123.8255844116211, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 8.1464, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.037243947858473, |
|
"grad_norm": 79.1299057006836, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 5.9439, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04965859714463067, |
|
"grad_norm": 79.47651672363281, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 4.1515, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06207324643078833, |
|
"grad_norm": 68.2268295288086, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 3.4012, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.074487895716946, |
|
"grad_norm": 81.23241424560547, |
|
"learning_rate": 2.9e-06, |
|
"loss": 3.3427, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08690254500310367, |
|
"grad_norm": 66.99320983886719, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 2.968, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09931719428926133, |
|
"grad_norm": 78.05485534667969, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 2.8233, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09931719428926133, |
|
"eval_loss": 0.804746150970459, |
|
"eval_runtime": 563.0664, |
|
"eval_samples_per_second": 1.931, |
|
"eval_steps_per_second": 0.966, |
|
"eval_wer": 0.489650974025974, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11173184357541899, |
|
"grad_norm": 46.1437873840332, |
|
"learning_rate": 4.4e-06, |
|
"loss": 2.6012, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.12414649286157665, |
|
"grad_norm": 99.51728057861328, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 2.2989, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13656114214773432, |
|
"grad_norm": 49.41315460205078, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 2.2207, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.148975791433892, |
|
"grad_norm": 53.38062286376953, |
|
"learning_rate": 5.9e-06, |
|
"loss": 2.202, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16139044072004965, |
|
"grad_norm": 49.83573913574219, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 2.2695, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.17380509000620734, |
|
"grad_norm": 75.33547973632812, |
|
"learning_rate": 6.9e-06, |
|
"loss": 1.9705, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.186219739292365, |
|
"grad_norm": 55.35056686401367, |
|
"learning_rate": 7.4e-06, |
|
"loss": 2.0473, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.19863438857852267, |
|
"grad_norm": 46.99931335449219, |
|
"learning_rate": 7.9e-06, |
|
"loss": 1.9329, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19863438857852267, |
|
"eval_loss": 0.6190668940544128, |
|
"eval_runtime": 574.5315, |
|
"eval_samples_per_second": 1.892, |
|
"eval_steps_per_second": 0.947, |
|
"eval_wer": 0.401075487012987, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21104903786468032, |
|
"grad_norm": 57.05539321899414, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 1.8312, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.22346368715083798, |
|
"grad_norm": 53.04418182373047, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 1.8474, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.23587833643699566, |
|
"grad_norm": 53.90583801269531, |
|
"learning_rate": 9.4e-06, |
|
"loss": 1.9193, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2482929857231533, |
|
"grad_norm": 51.17042922973633, |
|
"learning_rate": 9.9e-06, |
|
"loss": 1.725, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.260707635009311, |
|
"grad_norm": 42.38318634033203, |
|
"learning_rate": 9.996660544331274e-06, |
|
"loss": 1.8561, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.27312228429546864, |
|
"grad_norm": 51.599029541015625, |
|
"learning_rate": 9.992486224745367e-06, |
|
"loss": 1.9738, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2855369335816263, |
|
"grad_norm": 59.115108489990234, |
|
"learning_rate": 9.98831190515946e-06, |
|
"loss": 1.6793, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.297951582867784, |
|
"grad_norm": 42.64860534667969, |
|
"learning_rate": 9.984137585573552e-06, |
|
"loss": 1.6927, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.297951582867784, |
|
"eval_loss": 0.5420816540718079, |
|
"eval_runtime": 589.5719, |
|
"eval_samples_per_second": 1.844, |
|
"eval_steps_per_second": 0.923, |
|
"eval_wer": 0.37905844155844154, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31036623215394166, |
|
"grad_norm": 44.46907043457031, |
|
"learning_rate": 9.979963265987644e-06, |
|
"loss": 1.6992, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3227808814400993, |
|
"grad_norm": 38.840396881103516, |
|
"learning_rate": 9.975788946401737e-06, |
|
"loss": 1.5042, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.33519553072625696, |
|
"grad_norm": 46.88064956665039, |
|
"learning_rate": 9.97161462681583e-06, |
|
"loss": 1.7753, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.34761018001241467, |
|
"grad_norm": 44.91327667236328, |
|
"learning_rate": 9.967440307229922e-06, |
|
"loss": 1.7618, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3600248292985723, |
|
"grad_norm": 42.24628448486328, |
|
"learning_rate": 9.963265987644016e-06, |
|
"loss": 1.6682, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.37243947858473, |
|
"grad_norm": 45.74182891845703, |
|
"learning_rate": 9.959091668058107e-06, |
|
"loss": 1.5824, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.38485412787088763, |
|
"grad_norm": 25.388633728027344, |
|
"learning_rate": 9.954917348472199e-06, |
|
"loss": 1.6692, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.39726877715704534, |
|
"grad_norm": 33.251548767089844, |
|
"learning_rate": 9.950743028886292e-06, |
|
"loss": 1.6183, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.39726877715704534, |
|
"eval_loss": 0.48888257145881653, |
|
"eval_runtime": 577.1733, |
|
"eval_samples_per_second": 1.883, |
|
"eval_steps_per_second": 0.943, |
|
"eval_wer": 0.3210227272727273, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.409683426443203, |
|
"grad_norm": 30.38732147216797, |
|
"learning_rate": 9.946568709300385e-06, |
|
"loss": 1.4884, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.42209807572936064, |
|
"grad_norm": 48.94175338745117, |
|
"learning_rate": 9.942394389714477e-06, |
|
"loss": 1.4615, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4345127250155183, |
|
"grad_norm": 29.04236602783203, |
|
"learning_rate": 9.93822007012857e-06, |
|
"loss": 1.5201, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.44692737430167595, |
|
"grad_norm": 41.91320037841797, |
|
"learning_rate": 9.934045750542662e-06, |
|
"loss": 1.5147, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.45934202358783366, |
|
"grad_norm": 40.610572814941406, |
|
"learning_rate": 9.929871430956755e-06, |
|
"loss": 1.4561, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.4717566728739913, |
|
"grad_norm": 33.01325988769531, |
|
"learning_rate": 9.925697111370847e-06, |
|
"loss": 1.3772, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.48417132216014896, |
|
"grad_norm": 40.93734359741211, |
|
"learning_rate": 9.92152279178494e-06, |
|
"loss": 1.549, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.4965859714463066, |
|
"grad_norm": 41.81599044799805, |
|
"learning_rate": 9.917348472199032e-06, |
|
"loss": 1.4431, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4965859714463066, |
|
"eval_loss": 0.4683995544910431, |
|
"eval_runtime": 563.8925, |
|
"eval_samples_per_second": 1.928, |
|
"eval_steps_per_second": 0.965, |
|
"eval_wer": 0.28662743506493504, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5090006207324643, |
|
"grad_norm": 23.732839584350586, |
|
"learning_rate": 9.913174152613125e-06, |
|
"loss": 1.2911, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.521415270018622, |
|
"grad_norm": 35.39672088623047, |
|
"learning_rate": 9.908999833027217e-06, |
|
"loss": 1.2753, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5338299193047796, |
|
"grad_norm": 20.741168975830078, |
|
"learning_rate": 9.90482551344131e-06, |
|
"loss": 1.4464, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.5462445685909373, |
|
"grad_norm": 44.05943298339844, |
|
"learning_rate": 9.900651193855404e-06, |
|
"loss": 1.2189, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5586592178770949, |
|
"grad_norm": 30.3934268951416, |
|
"learning_rate": 9.896476874269495e-06, |
|
"loss": 1.357, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.5710738671632526, |
|
"grad_norm": 39.36647415161133, |
|
"learning_rate": 9.892302554683587e-06, |
|
"loss": 1.3864, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5834885164494104, |
|
"grad_norm": 39.50497055053711, |
|
"learning_rate": 9.88812823509768e-06, |
|
"loss": 1.5879, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.595903165735568, |
|
"grad_norm": 52.04657745361328, |
|
"learning_rate": 9.883953915511772e-06, |
|
"loss": 1.4117, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.595903165735568, |
|
"eval_loss": 0.42576098442077637, |
|
"eval_runtime": 574.3755, |
|
"eval_samples_per_second": 1.892, |
|
"eval_steps_per_second": 0.947, |
|
"eval_wer": 0.2650162337662338, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6083178150217257, |
|
"grad_norm": 41.47892761230469, |
|
"learning_rate": 9.879779595925865e-06, |
|
"loss": 1.2806, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.6207324643078833, |
|
"grad_norm": 35.136695861816406, |
|
"learning_rate": 9.875605276339958e-06, |
|
"loss": 1.2739, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.633147113594041, |
|
"grad_norm": 29.484039306640625, |
|
"learning_rate": 9.87143095675405e-06, |
|
"loss": 1.1364, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.6455617628801986, |
|
"grad_norm": 47.20607376098633, |
|
"learning_rate": 9.867256637168142e-06, |
|
"loss": 1.1565, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6579764121663563, |
|
"grad_norm": 44.51639938354492, |
|
"learning_rate": 9.863082317582235e-06, |
|
"loss": 1.2704, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.6703910614525139, |
|
"grad_norm": 56.79221725463867, |
|
"learning_rate": 9.858907997996328e-06, |
|
"loss": 1.3655, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6828057107386716, |
|
"grad_norm": 31.851566314697266, |
|
"learning_rate": 9.85473367841042e-06, |
|
"loss": 1.2962, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.6952203600248293, |
|
"grad_norm": 48.65141677856445, |
|
"learning_rate": 9.850559358824512e-06, |
|
"loss": 1.2699, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6952203600248293, |
|
"eval_loss": 0.4222487807273865, |
|
"eval_runtime": 572.8012, |
|
"eval_samples_per_second": 1.898, |
|
"eval_steps_per_second": 0.95, |
|
"eval_wer": 0.26653814935064934, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.707635009310987, |
|
"grad_norm": 32.21327209472656, |
|
"learning_rate": 9.846385039238605e-06, |
|
"loss": 1.1561, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.7200496585971446, |
|
"grad_norm": 38.31489181518555, |
|
"learning_rate": 9.842210719652696e-06, |
|
"loss": 1.3146, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7324643078833023, |
|
"grad_norm": 56.665260314941406, |
|
"learning_rate": 9.83803640006679e-06, |
|
"loss": 1.3184, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.74487895716946, |
|
"grad_norm": 49.64814758300781, |
|
"learning_rate": 9.833862080480883e-06, |
|
"loss": 1.0521, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7572936064556176, |
|
"grad_norm": 32.33070373535156, |
|
"learning_rate": 9.829687760894975e-06, |
|
"loss": 1.2677, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.7697082557417753, |
|
"grad_norm": 27.896947860717773, |
|
"learning_rate": 9.825513441309066e-06, |
|
"loss": 1.3059, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7821229050279329, |
|
"grad_norm": 28.060487747192383, |
|
"learning_rate": 9.82133912172316e-06, |
|
"loss": 1.3901, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.7945375543140907, |
|
"grad_norm": 32.01655578613281, |
|
"learning_rate": 9.817164802137253e-06, |
|
"loss": 1.0532, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7945375543140907, |
|
"eval_loss": 0.41084742546081543, |
|
"eval_runtime": 564.5825, |
|
"eval_samples_per_second": 1.925, |
|
"eval_steps_per_second": 0.964, |
|
"eval_wer": 0.2513189935064935, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8069522036002483, |
|
"grad_norm": 37.56877136230469, |
|
"learning_rate": 9.812990482551345e-06, |
|
"loss": 1.2314, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.819366852886406, |
|
"grad_norm": 23.31650161743164, |
|
"learning_rate": 9.808816162965438e-06, |
|
"loss": 1.214, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8317815021725636, |
|
"grad_norm": 52.62869644165039, |
|
"learning_rate": 9.80464184337953e-06, |
|
"loss": 1.1148, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.8441961514587213, |
|
"grad_norm": 37.902523040771484, |
|
"learning_rate": 9.800467523793621e-06, |
|
"loss": 1.1947, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8566108007448789, |
|
"grad_norm": 46.63554382324219, |
|
"learning_rate": 9.796293204207715e-06, |
|
"loss": 1.1841, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.8690254500310366, |
|
"grad_norm": 24.407249450683594, |
|
"learning_rate": 9.792118884621808e-06, |
|
"loss": 1.0706, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8814400993171942, |
|
"grad_norm": 33.92270278930664, |
|
"learning_rate": 9.7879445650359e-06, |
|
"loss": 1.0771, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.8938547486033519, |
|
"grad_norm": 36.15495681762695, |
|
"learning_rate": 9.783770245449993e-06, |
|
"loss": 1.0589, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8938547486033519, |
|
"eval_loss": 0.39820805191993713, |
|
"eval_runtime": 559.8426, |
|
"eval_samples_per_second": 1.942, |
|
"eval_steps_per_second": 0.972, |
|
"eval_wer": 0.22909902597402598, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9062693978895097, |
|
"grad_norm": 39.94309616088867, |
|
"learning_rate": 9.779595925864084e-06, |
|
"loss": 1.219, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.9186840471756673, |
|
"grad_norm": 29.685474395751953, |
|
"learning_rate": 9.775421606278178e-06, |
|
"loss": 1.2091, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.931098696461825, |
|
"grad_norm": 39.77056121826172, |
|
"learning_rate": 9.771247286692271e-06, |
|
"loss": 1.2096, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.9435133457479826, |
|
"grad_norm": 22.495344161987305, |
|
"learning_rate": 9.767072967106363e-06, |
|
"loss": 1.1108, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9559279950341403, |
|
"grad_norm": 42.11180114746094, |
|
"learning_rate": 9.762898647520454e-06, |
|
"loss": 1.0949, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.9683426443202979, |
|
"grad_norm": 41.73212432861328, |
|
"learning_rate": 9.758724327934548e-06, |
|
"loss": 1.2428, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.9807572936064556, |
|
"grad_norm": 39.16131591796875, |
|
"learning_rate": 9.75455000834864e-06, |
|
"loss": 0.9964, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.9931719428926132, |
|
"grad_norm": 27.52761459350586, |
|
"learning_rate": 9.750375688762733e-06, |
|
"loss": 1.1856, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9931719428926132, |
|
"eval_loss": 0.3853071331977844, |
|
"eval_runtime": 565.6151, |
|
"eval_samples_per_second": 1.922, |
|
"eval_steps_per_second": 0.962, |
|
"eval_wer": 0.23549107142857142, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0059590316573557, |
|
"grad_norm": 19.790531158447266, |
|
"learning_rate": 9.746201369176826e-06, |
|
"loss": 0.9702, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.0183736809435133, |
|
"grad_norm": 24.30504035949707, |
|
"learning_rate": 9.742027049590918e-06, |
|
"loss": 0.6177, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.030788330229671, |
|
"grad_norm": 25.81077003479004, |
|
"learning_rate": 9.73785273000501e-06, |
|
"loss": 0.5878, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.0432029795158286, |
|
"grad_norm": 29.500877380371094, |
|
"learning_rate": 9.733678410419102e-06, |
|
"loss": 0.6152, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0556176288019863, |
|
"grad_norm": 18.39103889465332, |
|
"learning_rate": 9.729504090833196e-06, |
|
"loss": 0.5966, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.068032278088144, |
|
"grad_norm": 42.394142150878906, |
|
"learning_rate": 9.725329771247287e-06, |
|
"loss": 0.6365, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.0804469273743016, |
|
"grad_norm": 19.30755043029785, |
|
"learning_rate": 9.72115545166138e-06, |
|
"loss": 0.6584, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.0928615766604592, |
|
"grad_norm": 22.643875122070312, |
|
"learning_rate": 9.716981132075472e-06, |
|
"loss": 0.6692, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.0928615766604592, |
|
"eval_loss": 0.40007734298706055, |
|
"eval_runtime": 581.1524, |
|
"eval_samples_per_second": 1.87, |
|
"eval_steps_per_second": 0.936, |
|
"eval_wer": 0.2650162337662338, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.105276225946617, |
|
"grad_norm": 28.355436325073242, |
|
"learning_rate": 9.712806812489564e-06, |
|
"loss": 0.6398, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.1176908752327748, |
|
"grad_norm": 29.392656326293945, |
|
"learning_rate": 9.708632492903657e-06, |
|
"loss": 0.64, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.1301055245189324, |
|
"grad_norm": 25.52250099182129, |
|
"learning_rate": 9.70445817331775e-06, |
|
"loss": 0.6339, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.14252017380509, |
|
"grad_norm": 26.52411460876465, |
|
"learning_rate": 9.700283853731842e-06, |
|
"loss": 0.5372, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.1549348230912477, |
|
"grad_norm": 26.201452255249023, |
|
"learning_rate": 9.696109534145936e-06, |
|
"loss": 0.5878, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.1673494723774054, |
|
"grad_norm": 23.98987579345703, |
|
"learning_rate": 9.691935214560027e-06, |
|
"loss": 0.5349, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.179764121663563, |
|
"grad_norm": 32.815521240234375, |
|
"learning_rate": 9.68776089497412e-06, |
|
"loss": 0.7508, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.1921787709497207, |
|
"grad_norm": 23.12726593017578, |
|
"learning_rate": 9.683586575388212e-06, |
|
"loss": 0.6505, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.1921787709497207, |
|
"eval_loss": 0.39191773533821106, |
|
"eval_runtime": 571.3726, |
|
"eval_samples_per_second": 1.902, |
|
"eval_steps_per_second": 0.952, |
|
"eval_wer": 0.23894074675324675, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2045934202358783, |
|
"grad_norm": 19.867704391479492, |
|
"learning_rate": 9.679412255802305e-06, |
|
"loss": 0.5807, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.217008069522036, |
|
"grad_norm": 19.685293197631836, |
|
"learning_rate": 9.675237936216397e-06, |
|
"loss": 0.7044, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.2294227188081936, |
|
"grad_norm": 28.70237159729004, |
|
"learning_rate": 9.67106361663049e-06, |
|
"loss": 0.6598, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.2418373680943513, |
|
"grad_norm": 36.98805618286133, |
|
"learning_rate": 9.666889297044582e-06, |
|
"loss": 0.6079, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.254252017380509, |
|
"grad_norm": 22.906494140625, |
|
"learning_rate": 9.662714977458675e-06, |
|
"loss": 0.7132, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.2666666666666666, |
|
"grad_norm": 21.013233184814453, |
|
"learning_rate": 9.658540657872769e-06, |
|
"loss": 0.6346, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.2790813159528243, |
|
"grad_norm": 22.889606475830078, |
|
"learning_rate": 9.65436633828686e-06, |
|
"loss": 0.5689, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.291495965238982, |
|
"grad_norm": 21.3165225982666, |
|
"learning_rate": 9.650192018700952e-06, |
|
"loss": 0.6613, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.291495965238982, |
|
"eval_loss": 0.3809148669242859, |
|
"eval_runtime": 575.5999, |
|
"eval_samples_per_second": 1.888, |
|
"eval_steps_per_second": 0.945, |
|
"eval_wer": 0.2385349025974026, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.3039106145251398, |
|
"grad_norm": 31.75080108642578, |
|
"learning_rate": 9.646017699115045e-06, |
|
"loss": 0.6436, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.3163252638112972, |
|
"grad_norm": 30.9864559173584, |
|
"learning_rate": 9.641843379529137e-06, |
|
"loss": 0.696, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.328739913097455, |
|
"grad_norm": 30.82682991027832, |
|
"learning_rate": 9.63766905994323e-06, |
|
"loss": 0.5955, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.3411545623836125, |
|
"grad_norm": 34.10749435424805, |
|
"learning_rate": 9.633494740357322e-06, |
|
"loss": 0.7117, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.3535692116697704, |
|
"grad_norm": 30.104955673217773, |
|
"learning_rate": 9.629320420771415e-06, |
|
"loss": 0.5666, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.365983860955928, |
|
"grad_norm": 23.225740432739258, |
|
"learning_rate": 9.625146101185507e-06, |
|
"loss": 0.5734, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.3783985102420857, |
|
"grad_norm": 20.32614517211914, |
|
"learning_rate": 9.6209717815996e-06, |
|
"loss": 0.6535, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.3908131595282434, |
|
"grad_norm": 23.999792098999023, |
|
"learning_rate": 9.616797462013693e-06, |
|
"loss": 0.6194, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.3908131595282434, |
|
"eval_loss": 0.3873368799686432, |
|
"eval_runtime": 568.9878, |
|
"eval_samples_per_second": 1.91, |
|
"eval_steps_per_second": 0.956, |
|
"eval_wer": 0.23427353896103897, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.403227808814401, |
|
"grad_norm": 18.715627670288086, |
|
"learning_rate": 9.612623142427785e-06, |
|
"loss": 0.5924, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.4156424581005587, |
|
"grad_norm": 24.6026611328125, |
|
"learning_rate": 9.608448822841877e-06, |
|
"loss": 0.5588, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.4280571073867163, |
|
"grad_norm": 32.74100875854492, |
|
"learning_rate": 9.60427450325597e-06, |
|
"loss": 0.6261, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.440471756672874, |
|
"grad_norm": 31.3200740814209, |
|
"learning_rate": 9.600100183670062e-06, |
|
"loss": 0.756, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.4528864059590316, |
|
"grad_norm": 19.404541015625, |
|
"learning_rate": 9.595925864084155e-06, |
|
"loss": 0.6082, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.4653010552451893, |
|
"grad_norm": 16.61175537109375, |
|
"learning_rate": 9.591751544498248e-06, |
|
"loss": 0.6567, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.477715704531347, |
|
"grad_norm": 22.71599006652832, |
|
"learning_rate": 9.587744197695776e-06, |
|
"loss": 0.6098, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.4901303538175046, |
|
"grad_norm": 32.15653610229492, |
|
"learning_rate": 9.583569878109869e-06, |
|
"loss": 0.6358, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.4901303538175046, |
|
"eval_loss": 0.38495373725891113, |
|
"eval_runtime": 561.3182, |
|
"eval_samples_per_second": 1.937, |
|
"eval_steps_per_second": 0.969, |
|
"eval_wer": 0.21418425324675325, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.5025450031036622, |
|
"grad_norm": 22.268293380737305, |
|
"learning_rate": 9.579395558523962e-06, |
|
"loss": 0.5949, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.51495965238982, |
|
"grad_norm": 28.58846092224121, |
|
"learning_rate": 9.575221238938054e-06, |
|
"loss": 0.6006, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.5273743016759775, |
|
"grad_norm": 25.382551193237305, |
|
"learning_rate": 9.571046919352145e-06, |
|
"loss": 0.5811, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.5397889509621354, |
|
"grad_norm": 34.780006408691406, |
|
"learning_rate": 9.566872599766239e-06, |
|
"loss": 0.5968, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.5522036002482928, |
|
"grad_norm": 21.326889038085938, |
|
"learning_rate": 9.562698280180332e-06, |
|
"loss": 0.4749, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.5646182495344507, |
|
"grad_norm": 27.90545654296875, |
|
"learning_rate": 9.558523960594424e-06, |
|
"loss": 0.6064, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.5770328988206082, |
|
"grad_norm": 22.328035354614258, |
|
"learning_rate": 9.554349641008517e-06, |
|
"loss": 0.5755, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.589447548106766, |
|
"grad_norm": 23.400901794433594, |
|
"learning_rate": 9.550175321422609e-06, |
|
"loss": 0.6208, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.589447548106766, |
|
"eval_loss": 0.37794527411460876, |
|
"eval_runtime": 565.9599, |
|
"eval_samples_per_second": 1.921, |
|
"eval_steps_per_second": 0.961, |
|
"eval_wer": 0.23883928571428573, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.6018621973929237, |
|
"grad_norm": 21.570287704467773, |
|
"learning_rate": 9.5460010018367e-06, |
|
"loss": 0.5788, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.6142768466790813, |
|
"grad_norm": 27.813451766967773, |
|
"learning_rate": 9.541826682250794e-06, |
|
"loss": 0.6945, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.626691495965239, |
|
"grad_norm": 30.955820083618164, |
|
"learning_rate": 9.537652362664887e-06, |
|
"loss": 0.5379, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.6391061452513966, |
|
"grad_norm": 20.53118133544922, |
|
"learning_rate": 9.533478043078979e-06, |
|
"loss": 0.6171, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.6515207945375543, |
|
"grad_norm": 23.763132095336914, |
|
"learning_rate": 9.529303723493072e-06, |
|
"loss": 0.6021, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.663935443823712, |
|
"grad_norm": 26.67987632751465, |
|
"learning_rate": 9.525129403907164e-06, |
|
"loss": 0.6727, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.6763500931098696, |
|
"grad_norm": 25.991594314575195, |
|
"learning_rate": 9.520955084321257e-06, |
|
"loss": 0.7155, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.6887647423960273, |
|
"grad_norm": 19.079315185546875, |
|
"learning_rate": 9.51678076473535e-06, |
|
"loss": 0.5932, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.6887647423960273, |
|
"eval_loss": 0.3724534511566162, |
|
"eval_runtime": 550.7846, |
|
"eval_samples_per_second": 1.974, |
|
"eval_steps_per_second": 0.988, |
|
"eval_wer": 0.20403814935064934, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.7011793916821851, |
|
"grad_norm": 18.52420997619629, |
|
"learning_rate": 9.512606445149442e-06, |
|
"loss": 0.6704, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.7135940409683426, |
|
"grad_norm": 19.514951705932617, |
|
"learning_rate": 9.508432125563533e-06, |
|
"loss": 0.5896, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.7260086902545004, |
|
"grad_norm": 28.89137840270996, |
|
"learning_rate": 9.504257805977627e-06, |
|
"loss": 0.5097, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.7384233395406579, |
|
"grad_norm": 32.02205276489258, |
|
"learning_rate": 9.500083486391718e-06, |
|
"loss": 0.6217, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.7508379888268157, |
|
"grad_norm": 36.85642623901367, |
|
"learning_rate": 9.495909166805812e-06, |
|
"loss": 0.666, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.7632526381129732, |
|
"grad_norm": 37.10481262207031, |
|
"learning_rate": 9.491734847219905e-06, |
|
"loss": 0.5903, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.775667287399131, |
|
"grad_norm": 19.526355743408203, |
|
"learning_rate": 9.487560527633997e-06, |
|
"loss": 0.5304, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.7880819366852885, |
|
"grad_norm": 30.528167724609375, |
|
"learning_rate": 9.483386208048088e-06, |
|
"loss": 0.5797, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.7880819366852885, |
|
"eval_loss": 0.3712182641029358, |
|
"eval_runtime": 558.4122, |
|
"eval_samples_per_second": 1.947, |
|
"eval_steps_per_second": 0.974, |
|
"eval_wer": 0.20921266233766234, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.8004965859714464, |
|
"grad_norm": 29.263221740722656, |
|
"learning_rate": 9.479211888462182e-06, |
|
"loss": 0.6156, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.812911235257604, |
|
"grad_norm": 23.728296279907227, |
|
"learning_rate": 9.475037568876275e-06, |
|
"loss": 0.6568, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.8253258845437617, |
|
"grad_norm": 15.723759651184082, |
|
"learning_rate": 9.470863249290367e-06, |
|
"loss": 0.566, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.8377405338299193, |
|
"grad_norm": 39.088584899902344, |
|
"learning_rate": 9.466688929704458e-06, |
|
"loss": 0.7007, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.850155183116077, |
|
"grad_norm": 20.931364059448242, |
|
"learning_rate": 9.462514610118551e-06, |
|
"loss": 0.6843, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.8625698324022346, |
|
"grad_norm": 23.179536819458008, |
|
"learning_rate": 9.458340290532643e-06, |
|
"loss": 0.6391, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.8749844816883923, |
|
"grad_norm": 31.087736129760742, |
|
"learning_rate": 9.454165970946736e-06, |
|
"loss": 0.6611, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.88739913097455, |
|
"grad_norm": 22.13474464416504, |
|
"learning_rate": 9.44999165136083e-06, |
|
"loss": 0.5707, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.88739913097455, |
|
"eval_loss": 0.37375178933143616, |
|
"eval_runtime": 565.1592, |
|
"eval_samples_per_second": 1.923, |
|
"eval_steps_per_second": 0.963, |
|
"eval_wer": 0.23417207792207792, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.8998137802607076, |
|
"grad_norm": 22.615114212036133, |
|
"learning_rate": 9.445817331774921e-06, |
|
"loss": 0.5573, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.9122284295468654, |
|
"grad_norm": 32.943199157714844, |
|
"learning_rate": 9.441643012189013e-06, |
|
"loss": 0.6528, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.9246430788330229, |
|
"grad_norm": 29.096609115600586, |
|
"learning_rate": 9.437468692603106e-06, |
|
"loss": 0.7014, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.9370577281191808, |
|
"grad_norm": 18.50649642944336, |
|
"learning_rate": 9.4332943730172e-06, |
|
"loss": 0.5836, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.9494723774053382, |
|
"grad_norm": 27.316129684448242, |
|
"learning_rate": 9.429120053431291e-06, |
|
"loss": 0.5993, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.961887026691496, |
|
"grad_norm": 26.35407257080078, |
|
"learning_rate": 9.424945733845385e-06, |
|
"loss": 0.5874, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.9743016759776535, |
|
"grad_norm": 23.183897018432617, |
|
"learning_rate": 9.420771414259476e-06, |
|
"loss": 0.6319, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.9867163252638114, |
|
"grad_norm": 25.644729614257812, |
|
"learning_rate": 9.416597094673568e-06, |
|
"loss": 0.5928, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.9867163252638114, |
|
"eval_loss": 0.3662048876285553, |
|
"eval_runtime": 574.0467, |
|
"eval_samples_per_second": 1.894, |
|
"eval_steps_per_second": 0.948, |
|
"eval_wer": 0.25892857142857145, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.9991309745499688, |
|
"grad_norm": 15.862359046936035, |
|
"learning_rate": 9.412422775087661e-06, |
|
"loss": 0.5867, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.0119180633147113, |
|
"grad_norm": 15.233346939086914, |
|
"learning_rate": 9.408248455501754e-06, |
|
"loss": 0.2928, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.024332712600869, |
|
"grad_norm": 41.226078033447266, |
|
"learning_rate": 9.404074135915846e-06, |
|
"loss": 0.2906, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.0367473618870267, |
|
"grad_norm": 16.719274520874023, |
|
"learning_rate": 9.39989981632994e-06, |
|
"loss": 0.3043, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.0491620111731845, |
|
"grad_norm": 17.11972999572754, |
|
"learning_rate": 9.395725496744031e-06, |
|
"loss": 0.3007, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.061576660459342, |
|
"grad_norm": 25.817195892333984, |
|
"learning_rate": 9.391551177158124e-06, |
|
"loss": 0.3189, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.0739913097455, |
|
"grad_norm": 22.05105972290039, |
|
"learning_rate": 9.387376857572218e-06, |
|
"loss": 0.2891, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.0864059590316573, |
|
"grad_norm": 21.231904983520508, |
|
"learning_rate": 9.38320253798631e-06, |
|
"loss": 0.2626, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.0864059590316573, |
|
"eval_loss": 0.3803122341632843, |
|
"eval_runtime": 575.7956, |
|
"eval_samples_per_second": 1.888, |
|
"eval_steps_per_second": 0.945, |
|
"eval_wer": 0.26968344155844154, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.098820608317815, |
|
"grad_norm": 21.424543380737305, |
|
"learning_rate": 9.379028218400401e-06, |
|
"loss": 0.2837, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.1112352576039726, |
|
"grad_norm": 20.14120864868164, |
|
"learning_rate": 9.374853898814494e-06, |
|
"loss": 0.2861, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.1236499068901304, |
|
"grad_norm": 29.401103973388672, |
|
"learning_rate": 9.370679579228586e-06, |
|
"loss": 0.28, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.136064556176288, |
|
"grad_norm": 15.73469352722168, |
|
"learning_rate": 9.36650525964268e-06, |
|
"loss": 0.2564, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.1484792054624458, |
|
"grad_norm": 16.33969497680664, |
|
"learning_rate": 9.362330940056773e-06, |
|
"loss": 0.2648, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.160893854748603, |
|
"grad_norm": 13.485337257385254, |
|
"learning_rate": 9.358156620470864e-06, |
|
"loss": 0.2615, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.173308504034761, |
|
"grad_norm": 17.95641326904297, |
|
"learning_rate": 9.353982300884956e-06, |
|
"loss": 0.2943, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.1857231533209185, |
|
"grad_norm": 20.702796936035156, |
|
"learning_rate": 9.349807981299049e-06, |
|
"loss": 0.2557, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.1857231533209185, |
|
"eval_loss": 0.3853345811367035, |
|
"eval_runtime": 558.2923, |
|
"eval_samples_per_second": 1.947, |
|
"eval_steps_per_second": 0.974, |
|
"eval_wer": 0.21022727272727273, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.1981378026070764, |
|
"grad_norm": 19.750181198120117, |
|
"learning_rate": 9.345633661713142e-06, |
|
"loss": 0.2404, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.210552451893234, |
|
"grad_norm": 21.653345108032227, |
|
"learning_rate": 9.341459342127234e-06, |
|
"loss": 0.3591, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.2229671011793917, |
|
"grad_norm": 17.557680130004883, |
|
"learning_rate": 9.337285022541327e-06, |
|
"loss": 0.3089, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.2353817504655495, |
|
"grad_norm": 6.009192943572998, |
|
"learning_rate": 9.333110702955419e-06, |
|
"loss": 0.3418, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.247796399751707, |
|
"grad_norm": 17.482463836669922, |
|
"learning_rate": 9.32893638336951e-06, |
|
"loss": 0.2301, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.260211049037865, |
|
"grad_norm": 13.825834274291992, |
|
"learning_rate": 9.324762063783604e-06, |
|
"loss": 0.3232, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.2726256983240223, |
|
"grad_norm": 9.021127700805664, |
|
"learning_rate": 9.320587744197697e-06, |
|
"loss": 0.3317, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.28504034761018, |
|
"grad_norm": 22.399105072021484, |
|
"learning_rate": 9.316413424611789e-06, |
|
"loss": 0.3342, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.28504034761018, |
|
"eval_loss": 0.38909900188446045, |
|
"eval_runtime": 555.7727, |
|
"eval_samples_per_second": 1.956, |
|
"eval_steps_per_second": 0.979, |
|
"eval_wer": 0.20616883116883117, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.28504034761018, |
|
"step": 4600, |
|
"total_flos": 3.756642543599616e+19, |
|
"train_loss": 1.1144439057681872, |
|
"train_runtime": 22344.0923, |
|
"train_samples_per_second": 21.63, |
|
"train_steps_per_second": 2.703 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 60390, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.756642543599616e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|