|
{
|
|
"best_metric": 0.6916529645033369,
|
|
"best_model_checkpoint": "d:\\DataTicon\\Whisper-Khmer-Small\\whisper-khmer\\outputs\\whisper-khmer-tiny\\checkpoint-3000",
|
|
"epoch": 2.7447392497712717,
|
|
"eval_steps": 500,
|
|
"global_step": 3000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.009149130832570906,
|
|
"grad_norm": 76.3263931274414,
|
|
"learning_rate": 5e-06,
|
|
"loss": 3.7253,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.018298261665141813,
|
|
"grad_norm": 23.748031616210938,
|
|
"learning_rate": 1.5e-05,
|
|
"loss": 2.8873,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.027447392497712716,
|
|
"grad_norm": 17.664098739624023,
|
|
"learning_rate": 2.5e-05,
|
|
"loss": 2.2375,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.036596523330283626,
|
|
"grad_norm": 12.52354907989502,
|
|
"learning_rate": 3.5e-05,
|
|
"loss": 1.8201,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.04574565416285453,
|
|
"grad_norm": 27.447662353515625,
|
|
"learning_rate": 4.5e-05,
|
|
"loss": 1.6313,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.05489478499542543,
|
|
"grad_norm": 24.413909912109375,
|
|
"learning_rate": 4.992257664911737e-05,
|
|
"loss": 1.6002,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.06404391582799634,
|
|
"grad_norm": 21.101381301879883,
|
|
"learning_rate": 4.976772994735213e-05,
|
|
"loss": 1.5404,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.07319304666056725,
|
|
"grad_norm": 14.289061546325684,
|
|
"learning_rate": 4.961288324558687e-05,
|
|
"loss": 1.4752,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.08234217749313816,
|
|
"grad_norm": 14.361418724060059,
|
|
"learning_rate": 4.945803654382162e-05,
|
|
"loss": 1.4342,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.09149130832570906,
|
|
"grad_norm": 10.036293983459473,
|
|
"learning_rate": 4.930318984205637e-05,
|
|
"loss": 1.3983,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.10064043915827996,
|
|
"grad_norm": 14.588384628295898,
|
|
"learning_rate": 4.9148343140291115e-05,
|
|
"loss": 1.3811,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.10978956999085086,
|
|
"grad_norm": 11.769558906555176,
|
|
"learning_rate": 4.899349643852586e-05,
|
|
"loss": 1.3395,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.11893870082342177,
|
|
"grad_norm": 15.153656959533691,
|
|
"learning_rate": 4.883864973676061e-05,
|
|
"loss": 1.3043,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.1280878316559927,
|
|
"grad_norm": 10.691374778747559,
|
|
"learning_rate": 4.8683803034995354e-05,
|
|
"loss": 1.2971,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.1372369624885636,
|
|
"grad_norm": 18.516103744506836,
|
|
"learning_rate": 4.85289563332301e-05,
|
|
"loss": 1.2716,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.1463860933211345,
|
|
"grad_norm": 10.616347312927246,
|
|
"learning_rate": 4.837410963146485e-05,
|
|
"loss": 1.2456,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.1555352241537054,
|
|
"grad_norm": 14.87991714477539,
|
|
"learning_rate": 4.82192629296996e-05,
|
|
"loss": 1.2224,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.16468435498627632,
|
|
"grad_norm": 19.040470123291016,
|
|
"learning_rate": 4.806441622793435e-05,
|
|
"loss": 1.2364,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.17383348581884722,
|
|
"grad_norm": 18.312976837158203,
|
|
"learning_rate": 4.7909569526169096e-05,
|
|
"loss": 1.1906,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.18298261665141813,
|
|
"grad_norm": 21.12237548828125,
|
|
"learning_rate": 4.7754722824403844e-05,
|
|
"loss": 1.1894,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.19213174748398903,
|
|
"grad_norm": 8.743210792541504,
|
|
"learning_rate": 4.759987612263859e-05,
|
|
"loss": 1.1648,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.2012808783165599,
|
|
"grad_norm": 11.74830150604248,
|
|
"learning_rate": 4.744502942087334e-05,
|
|
"loss": 1.1461,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.21043000914913082,
|
|
"grad_norm": 15.216257095336914,
|
|
"learning_rate": 4.729018271910808e-05,
|
|
"loss": 1.1189,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.21957913998170173,
|
|
"grad_norm": 17.434612274169922,
|
|
"learning_rate": 4.713533601734284e-05,
|
|
"loss": 1.126,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.22872827081427263,
|
|
"grad_norm": 11.15873908996582,
|
|
"learning_rate": 4.698048931557758e-05,
|
|
"loss": 1.0959,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.23787740164684354,
|
|
"grad_norm": 13.587617874145508,
|
|
"learning_rate": 4.682564261381233e-05,
|
|
"loss": 1.0969,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.24702653247941445,
|
|
"grad_norm": 11.1122407913208,
|
|
"learning_rate": 4.6670795912047076e-05,
|
|
"loss": 1.0839,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.2561756633119854,
|
|
"grad_norm": 16.767852783203125,
|
|
"learning_rate": 4.6515949210281825e-05,
|
|
"loss": 1.053,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.2653247941445563,
|
|
"grad_norm": 14.137765884399414,
|
|
"learning_rate": 4.6361102508516566e-05,
|
|
"loss": 1.0309,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.2744739249771272,
|
|
"grad_norm": 13.212748527526855,
|
|
"learning_rate": 4.620625580675132e-05,
|
|
"loss": 0.9809,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.2836230558096981,
|
|
"grad_norm": 12.678833961486816,
|
|
"learning_rate": 4.605140910498606e-05,
|
|
"loss": 0.9719,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.292772186642269,
|
|
"grad_norm": 11.58700180053711,
|
|
"learning_rate": 4.589656240322082e-05,
|
|
"loss": 0.9258,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.3019213174748399,
|
|
"grad_norm": 10.919293403625488,
|
|
"learning_rate": 4.574171570145556e-05,
|
|
"loss": 0.8796,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.3110704483074108,
|
|
"grad_norm": 10.442853927612305,
|
|
"learning_rate": 4.558686899969031e-05,
|
|
"loss": 0.8912,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.3202195791399817,
|
|
"grad_norm": 11.729557991027832,
|
|
"learning_rate": 4.543202229792506e-05,
|
|
"loss": 0.8275,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.32936870997255263,
|
|
"grad_norm": 9.217303276062012,
|
|
"learning_rate": 4.5277175596159805e-05,
|
|
"loss": 0.7889,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.33851784080512354,
|
|
"grad_norm": 13.999395370483398,
|
|
"learning_rate": 4.512232889439455e-05,
|
|
"loss": 0.736,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.34766697163769444,
|
|
"grad_norm": 18.503355026245117,
|
|
"learning_rate": 4.49674821926293e-05,
|
|
"loss": 0.7104,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.35681610247026535,
|
|
"grad_norm": 11.04101848602295,
|
|
"learning_rate": 4.4812635490864044e-05,
|
|
"loss": 0.7163,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.36596523330283626,
|
|
"grad_norm": 9.643781661987305,
|
|
"learning_rate": 4.465778878909879e-05,
|
|
"loss": 0.6944,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.37511436413540716,
|
|
"grad_norm": 8.121737480163574,
|
|
"learning_rate": 4.450294208733354e-05,
|
|
"loss": 0.6771,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.38426349496797807,
|
|
"grad_norm": 12.704200744628906,
|
|
"learning_rate": 4.434809538556829e-05,
|
|
"loss": 0.6185,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.3934126258005489,
|
|
"grad_norm": 8.623883247375488,
|
|
"learning_rate": 4.419324868380304e-05,
|
|
"loss": 0.6471,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.4025617566331198,
|
|
"grad_norm": 9.99401569366455,
|
|
"learning_rate": 4.4038401982037786e-05,
|
|
"loss": 0.6112,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.41171088746569073,
|
|
"grad_norm": 9.302281379699707,
|
|
"learning_rate": 4.3883555280272534e-05,
|
|
"loss": 0.6054,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.42086001829826164,
|
|
"grad_norm": 12.407485961914062,
|
|
"learning_rate": 4.372870857850728e-05,
|
|
"loss": 0.6045,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.43000914913083255,
|
|
"grad_norm": 11.521641731262207,
|
|
"learning_rate": 4.357386187674203e-05,
|
|
"loss": 0.5927,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.43915827996340345,
|
|
"grad_norm": 18.213590621948242,
|
|
"learning_rate": 4.341901517497677e-05,
|
|
"loss": 0.5654,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.44830741079597436,
|
|
"grad_norm": 9.964733123779297,
|
|
"learning_rate": 4.326416847321153e-05,
|
|
"loss": 0.5561,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.45745654162854527,
|
|
"grad_norm": 10.646913528442383,
|
|
"learning_rate": 4.310932177144627e-05,
|
|
"loss": 0.5174,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.45745654162854527,
|
|
"eval_loss": 0.5902902483940125,
|
|
"eval_runtime": 436.5208,
|
|
"eval_samples_per_second": 1.766,
|
|
"eval_steps_per_second": 0.112,
|
|
"eval_wer": 0.9019247509430313,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.46660567246111617,
|
|
"grad_norm": 7.100905895233154,
|
|
"learning_rate": 4.295447506968102e-05,
|
|
"loss": 0.5449,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.4757548032936871,
|
|
"grad_norm": 14.554773330688477,
|
|
"learning_rate": 4.2799628367915767e-05,
|
|
"loss": 0.5277,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.484903934126258,
|
|
"grad_norm": 12.082781791687012,
|
|
"learning_rate": 4.2644781666150515e-05,
|
|
"loss": 0.5299,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.4940530649588289,
|
|
"grad_norm": 8.607912063598633,
|
|
"learning_rate": 4.2489934964385257e-05,
|
|
"loss": 0.5395,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.5032021957913998,
|
|
"grad_norm": 9.982528686523438,
|
|
"learning_rate": 4.233508826262001e-05,
|
|
"loss": 0.5197,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.5123513266239708,
|
|
"grad_norm": 12.866645812988281,
|
|
"learning_rate": 4.2180241560854753e-05,
|
|
"loss": 0.4857,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.5215004574565416,
|
|
"grad_norm": 9.12654972076416,
|
|
"learning_rate": 4.20253948590895e-05,
|
|
"loss": 0.4852,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.5306495882891126,
|
|
"grad_norm": 7.0818705558776855,
|
|
"learning_rate": 4.187054815732425e-05,
|
|
"loss": 0.5013,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.5397987191216834,
|
|
"grad_norm": 9.520069122314453,
|
|
"learning_rate": 4.1715701455559e-05,
|
|
"loss": 0.4656,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.5489478499542544,
|
|
"grad_norm": 8.271717071533203,
|
|
"learning_rate": 4.156085475379375e-05,
|
|
"loss": 0.4866,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.5580969807868252,
|
|
"grad_norm": 9.679398536682129,
|
|
"learning_rate": 4.1406008052028496e-05,
|
|
"loss": 0.5045,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.5672461116193962,
|
|
"grad_norm": 9.209792137145996,
|
|
"learning_rate": 4.125116135026324e-05,
|
|
"loss": 0.435,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.576395242451967,
|
|
"grad_norm": 7.2256669998168945,
|
|
"learning_rate": 4.109631464849799e-05,
|
|
"loss": 0.4575,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.585544373284538,
|
|
"grad_norm": 7.8047990798950195,
|
|
"learning_rate": 4.0941467946732734e-05,
|
|
"loss": 0.4598,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.5946935041171089,
|
|
"grad_norm": 7.035597801208496,
|
|
"learning_rate": 4.078662124496748e-05,
|
|
"loss": 0.4714,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.6038426349496798,
|
|
"grad_norm": 7.996973514556885,
|
|
"learning_rate": 4.063177454320223e-05,
|
|
"loss": 0.4596,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.6129917657822507,
|
|
"grad_norm": 6.872828960418701,
|
|
"learning_rate": 4.047692784143698e-05,
|
|
"loss": 0.4106,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.6221408966148216,
|
|
"grad_norm": 6.923854827880859,
|
|
"learning_rate": 4.032208113967173e-05,
|
|
"loss": 0.42,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.6312900274473925,
|
|
"grad_norm": 12.70057487487793,
|
|
"learning_rate": 4.0167234437906476e-05,
|
|
"loss": 0.4229,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.6404391582799634,
|
|
"grad_norm": 6.876515865325928,
|
|
"learning_rate": 4.0012387736141224e-05,
|
|
"loss": 0.4114,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.6495882891125343,
|
|
"grad_norm": 7.49954891204834,
|
|
"learning_rate": 3.9857541034375966e-05,
|
|
"loss": 0.4163,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.6587374199451053,
|
|
"grad_norm": 6.375706672668457,
|
|
"learning_rate": 3.970269433261072e-05,
|
|
"loss": 0.4067,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.6678865507776761,
|
|
"grad_norm": 5.803896427154541,
|
|
"learning_rate": 3.954784763084546e-05,
|
|
"loss": 0.4269,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.6770356816102471,
|
|
"grad_norm": 9.036760330200195,
|
|
"learning_rate": 3.939300092908022e-05,
|
|
"loss": 0.4117,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.6861848124428179,
|
|
"grad_norm": 6.481241226196289,
|
|
"learning_rate": 3.923815422731496e-05,
|
|
"loss": 0.4561,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.6953339432753889,
|
|
"grad_norm": 7.707711219787598,
|
|
"learning_rate": 3.908330752554971e-05,
|
|
"loss": 0.4075,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.7044830741079597,
|
|
"grad_norm": 6.894267559051514,
|
|
"learning_rate": 3.892846082378446e-05,
|
|
"loss": 0.4036,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.7136322049405307,
|
|
"grad_norm": 6.747013568878174,
|
|
"learning_rate": 3.8773614122019205e-05,
|
|
"loss": 0.4139,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.7227813357731016,
|
|
"grad_norm": 8.749561309814453,
|
|
"learning_rate": 3.861876742025395e-05,
|
|
"loss": 0.391,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.7319304666056725,
|
|
"grad_norm": 6.197606086730957,
|
|
"learning_rate": 3.84639207184887e-05,
|
|
"loss": 0.4115,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.7410795974382434,
|
|
"grad_norm": 6.012449264526367,
|
|
"learning_rate": 3.8309074016723444e-05,
|
|
"loss": 0.43,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.7502287282708143,
|
|
"grad_norm": 9.235795021057129,
|
|
"learning_rate": 3.815422731495819e-05,
|
|
"loss": 0.4013,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.7593778591033852,
|
|
"grad_norm": 6.508467197418213,
|
|
"learning_rate": 3.799938061319294e-05,
|
|
"loss": 0.4084,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.7685269899359561,
|
|
"grad_norm": 12.164517402648926,
|
|
"learning_rate": 3.784453391142769e-05,
|
|
"loss": 0.422,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.777676120768527,
|
|
"grad_norm": 6.47005033493042,
|
|
"learning_rate": 3.768968720966244e-05,
|
|
"loss": 0.3806,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.7868252516010978,
|
|
"grad_norm": 6.4245476722717285,
|
|
"learning_rate": 3.7534840507897186e-05,
|
|
"loss": 0.3772,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.7959743824336688,
|
|
"grad_norm": 6.941617965698242,
|
|
"learning_rate": 3.737999380613193e-05,
|
|
"loss": 0.3621,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.8051235132662397,
|
|
"grad_norm": 6.679232120513916,
|
|
"learning_rate": 3.722514710436668e-05,
|
|
"loss": 0.3699,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.8142726440988106,
|
|
"grad_norm": 7.287721157073975,
|
|
"learning_rate": 3.7070300402601424e-05,
|
|
"loss": 0.3728,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.8234217749313815,
|
|
"grad_norm": 7.297004699707031,
|
|
"learning_rate": 3.691545370083617e-05,
|
|
"loss": 0.3823,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.8325709057639524,
|
|
"grad_norm": 5.730973720550537,
|
|
"learning_rate": 3.676060699907092e-05,
|
|
"loss": 0.3716,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.8417200365965233,
|
|
"grad_norm": 8.157340049743652,
|
|
"learning_rate": 3.660576029730567e-05,
|
|
"loss": 0.3731,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.8508691674290942,
|
|
"grad_norm": 8.863473892211914,
|
|
"learning_rate": 3.645091359554042e-05,
|
|
"loss": 0.3445,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.8600182982616651,
|
|
"grad_norm": 5.911675453186035,
|
|
"learning_rate": 3.6296066893775166e-05,
|
|
"loss": 0.3671,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.869167429094236,
|
|
"grad_norm": 6.246954441070557,
|
|
"learning_rate": 3.6141220192009915e-05,
|
|
"loss": 0.3876,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.8783165599268069,
|
|
"grad_norm": 4.594511032104492,
|
|
"learning_rate": 3.5986373490244656e-05,
|
|
"loss": 0.3637,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.8874656907593779,
|
|
"grad_norm": 7.323066234588623,
|
|
"learning_rate": 3.583152678847941e-05,
|
|
"loss": 0.3624,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.8966148215919487,
|
|
"grad_norm": 6.408933639526367,
|
|
"learning_rate": 3.567668008671415e-05,
|
|
"loss": 0.3496,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.9057639524245197,
|
|
"grad_norm": 5.430429935455322,
|
|
"learning_rate": 3.552183338494891e-05,
|
|
"loss": 0.368,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.9149130832570905,
|
|
"grad_norm": 7.088529109954834,
|
|
"learning_rate": 3.536698668318365e-05,
|
|
"loss": 0.3655,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.9149130832570905,
|
|
"eval_loss": 0.3907645046710968,
|
|
"eval_runtime": 424.8249,
|
|
"eval_samples_per_second": 1.815,
|
|
"eval_steps_per_second": 0.115,
|
|
"eval_wer": 0.8130380114130961,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.9240622140896615,
|
|
"grad_norm": 5.1332292556762695,
|
|
"learning_rate": 3.52121399814184e-05,
|
|
"loss": 0.3153,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.9332113449222323,
|
|
"grad_norm": 5.477539539337158,
|
|
"learning_rate": 3.505729327965315e-05,
|
|
"loss": 0.3383,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.9423604757548033,
|
|
"grad_norm": 6.7095866203308105,
|
|
"learning_rate": 3.4902446577887895e-05,
|
|
"loss": 0.3351,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.9515096065873742,
|
|
"grad_norm": 4.704165935516357,
|
|
"learning_rate": 3.474759987612264e-05,
|
|
"loss": 0.3144,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.9606587374199451,
|
|
"grad_norm": 9.83104419708252,
|
|
"learning_rate": 3.459275317435739e-05,
|
|
"loss": 0.3667,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.969807868252516,
|
|
"grad_norm": 6.172043323516846,
|
|
"learning_rate": 3.4437906472592134e-05,
|
|
"loss": 0.3298,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.9789569990850869,
|
|
"grad_norm": 6.027336597442627,
|
|
"learning_rate": 3.428305977082688e-05,
|
|
"loss": 0.3269,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.9881061299176578,
|
|
"grad_norm": 6.435912132263184,
|
|
"learning_rate": 3.412821306906163e-05,
|
|
"loss": 0.3203,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.9972552607502287,
|
|
"grad_norm": 7.0265913009643555,
|
|
"learning_rate": 3.397336636729638e-05,
|
|
"loss": 0.3413,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 1.0064043915827996,
|
|
"grad_norm": 6.904513835906982,
|
|
"learning_rate": 3.381851966553112e-05,
|
|
"loss": 0.3164,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 1.0155535224153704,
|
|
"grad_norm": 5.236996173858643,
|
|
"learning_rate": 3.3663672963765876e-05,
|
|
"loss": 0.2675,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 1.0247026532479415,
|
|
"grad_norm": 6.988259792327881,
|
|
"learning_rate": 3.350882626200062e-05,
|
|
"loss": 0.3134,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 1.0338517840805124,
|
|
"grad_norm": 8.87595272064209,
|
|
"learning_rate": 3.335397956023537e-05,
|
|
"loss": 0.3065,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 1.0430009149130832,
|
|
"grad_norm": 7.2589287757873535,
|
|
"learning_rate": 3.3199132858470114e-05,
|
|
"loss": 0.2798,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 1.052150045745654,
|
|
"grad_norm": 7.233737945556641,
|
|
"learning_rate": 3.304428615670486e-05,
|
|
"loss": 0.2954,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 1.0612991765782251,
|
|
"grad_norm": 4.9386887550354,
|
|
"learning_rate": 3.288943945493961e-05,
|
|
"loss": 0.2959,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 1.070448307410796,
|
|
"grad_norm": 6.335395812988281,
|
|
"learning_rate": 3.273459275317436e-05,
|
|
"loss": 0.295,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 1.0795974382433668,
|
|
"grad_norm": 6.33104944229126,
|
|
"learning_rate": 3.257974605140911e-05,
|
|
"loss": 0.2997,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 1.0887465690759377,
|
|
"grad_norm": 5.694860458374023,
|
|
"learning_rate": 3.2424899349643856e-05,
|
|
"loss": 0.262,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 1.0978956999085088,
|
|
"grad_norm": 5.777647018432617,
|
|
"learning_rate": 3.2270052647878605e-05,
|
|
"loss": 0.2822,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 1.1070448307410796,
|
|
"grad_norm": 8.250167846679688,
|
|
"learning_rate": 3.2115205946113346e-05,
|
|
"loss": 0.2778,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 1.1161939615736505,
|
|
"grad_norm": 4.275432586669922,
|
|
"learning_rate": 3.19603592443481e-05,
|
|
"loss": 0.2793,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 1.1253430924062213,
|
|
"grad_norm": 4.831576824188232,
|
|
"learning_rate": 3.180551254258284e-05,
|
|
"loss": 0.2815,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 1.1344922232387924,
|
|
"grad_norm": 5.6868720054626465,
|
|
"learning_rate": 3.165066584081759e-05,
|
|
"loss": 0.3085,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 1.1436413540713632,
|
|
"grad_norm": 5.878891944885254,
|
|
"learning_rate": 3.149581913905234e-05,
|
|
"loss": 0.2543,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 1.152790484903934,
|
|
"grad_norm": 5.85615348815918,
|
|
"learning_rate": 3.134097243728709e-05,
|
|
"loss": 0.2538,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 1.161939615736505,
|
|
"grad_norm": 4.6179118156433105,
|
|
"learning_rate": 3.118612573552184e-05,
|
|
"loss": 0.2565,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 1.171088746569076,
|
|
"grad_norm": 5.257189750671387,
|
|
"learning_rate": 3.1031279033756585e-05,
|
|
"loss": 0.2681,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 1.1802378774016469,
|
|
"grad_norm": 4.76942253112793,
|
|
"learning_rate": 3.087643233199133e-05,
|
|
"loss": 0.2826,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 1.1893870082342177,
|
|
"grad_norm": 5.82953405380249,
|
|
"learning_rate": 3.072158563022608e-05,
|
|
"loss": 0.2826,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 1.1985361390667886,
|
|
"grad_norm": 5.2305731773376465,
|
|
"learning_rate": 3.0566738928460824e-05,
|
|
"loss": 0.2598,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 1.2076852698993596,
|
|
"grad_norm": 5.51474666595459,
|
|
"learning_rate": 3.0411892226695572e-05,
|
|
"loss": 0.2685,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 1.2168344007319305,
|
|
"grad_norm": 7.23142147064209,
|
|
"learning_rate": 3.025704552493032e-05,
|
|
"loss": 0.285,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 1.2259835315645013,
|
|
"grad_norm": 5.186690807342529,
|
|
"learning_rate": 3.010219882316507e-05,
|
|
"loss": 0.2872,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 1.2351326623970722,
|
|
"grad_norm": 5.723147392272949,
|
|
"learning_rate": 2.9947352121399814e-05,
|
|
"loss": 0.2631,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 1.2442817932296433,
|
|
"grad_norm": 4.612165451049805,
|
|
"learning_rate": 2.9792505419634566e-05,
|
|
"loss": 0.2966,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 1.2534309240622141,
|
|
"grad_norm": 5.467476844787598,
|
|
"learning_rate": 2.963765871786931e-05,
|
|
"loss": 0.2308,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 1.262580054894785,
|
|
"grad_norm": 4.7134785652160645,
|
|
"learning_rate": 2.9482812016104063e-05,
|
|
"loss": 0.2781,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 1.2717291857273558,
|
|
"grad_norm": 4.138732433319092,
|
|
"learning_rate": 2.9327965314338808e-05,
|
|
"loss": 0.2452,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 1.2808783165599267,
|
|
"grad_norm": 4.39865255355835,
|
|
"learning_rate": 2.9173118612573553e-05,
|
|
"loss": 0.2759,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 1.2900274473924978,
|
|
"grad_norm": 6.269981384277344,
|
|
"learning_rate": 2.9018271910808305e-05,
|
|
"loss": 0.2802,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 1.2991765782250686,
|
|
"grad_norm": 5.472837924957275,
|
|
"learning_rate": 2.886342520904305e-05,
|
|
"loss": 0.282,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 1.3083257090576395,
|
|
"grad_norm": 5.290619850158691,
|
|
"learning_rate": 2.8708578507277795e-05,
|
|
"loss": 0.2443,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 1.3174748398902105,
|
|
"grad_norm": 4.903107643127441,
|
|
"learning_rate": 2.8553731805512546e-05,
|
|
"loss": 0.255,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 1.3266239707227814,
|
|
"grad_norm": 5.144070625305176,
|
|
"learning_rate": 2.839888510374729e-05,
|
|
"loss": 0.2375,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 1.3357731015553522,
|
|
"grad_norm": 4.945043087005615,
|
|
"learning_rate": 2.8244038401982036e-05,
|
|
"loss": 0.2381,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 1.344922232387923,
|
|
"grad_norm": 5.670736789703369,
|
|
"learning_rate": 2.8089191700216788e-05,
|
|
"loss": 0.2398,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 1.354071363220494,
|
|
"grad_norm": 5.526036739349365,
|
|
"learning_rate": 2.7934344998451533e-05,
|
|
"loss": 0.2748,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 1.363220494053065,
|
|
"grad_norm": 4.805148601531982,
|
|
"learning_rate": 2.7779498296686278e-05,
|
|
"loss": 0.2412,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 1.3723696248856359,
|
|
"grad_norm": 4.122767925262451,
|
|
"learning_rate": 2.762465159492103e-05,
|
|
"loss": 0.2805,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 1.3723696248856359,
|
|
"eval_loss": 0.33601683378219604,
|
|
"eval_runtime": 432.4301,
|
|
"eval_samples_per_second": 1.783,
|
|
"eval_steps_per_second": 0.113,
|
|
"eval_wer": 0.7586807234742238,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 1.3815187557182067,
|
|
"grad_norm": 4.068643569946289,
|
|
"learning_rate": 2.7469804893155775e-05,
|
|
"loss": 0.2527,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 1.3906678865507778,
|
|
"grad_norm": 5.818108081817627,
|
|
"learning_rate": 2.7314958191390527e-05,
|
|
"loss": 0.2707,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 1.3998170173833486,
|
|
"grad_norm": 6.448596477508545,
|
|
"learning_rate": 2.7160111489625272e-05,
|
|
"loss": 0.2466,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 1.4089661482159195,
|
|
"grad_norm": 6.120127201080322,
|
|
"learning_rate": 2.7005264787860017e-05,
|
|
"loss": 0.259,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 1.4181152790484903,
|
|
"grad_norm": 4.396270751953125,
|
|
"learning_rate": 2.685041808609477e-05,
|
|
"loss": 0.2505,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 1.4272644098810612,
|
|
"grad_norm": 3.8976686000823975,
|
|
"learning_rate": 2.6695571384329514e-05,
|
|
"loss": 0.2429,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 1.4364135407136323,
|
|
"grad_norm": 4.241589069366455,
|
|
"learning_rate": 2.6540724682564262e-05,
|
|
"loss": 0.2424,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 1.445562671546203,
|
|
"grad_norm": 6.113090515136719,
|
|
"learning_rate": 2.638587798079901e-05,
|
|
"loss": 0.2642,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 1.454711802378774,
|
|
"grad_norm": 4.122611999511719,
|
|
"learning_rate": 2.623103127903376e-05,
|
|
"loss": 0.2259,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 1.463860933211345,
|
|
"grad_norm": 4.869472026824951,
|
|
"learning_rate": 2.6076184577268504e-05,
|
|
"loss": 0.2542,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 1.4730100640439159,
|
|
"grad_norm": 4.926369667053223,
|
|
"learning_rate": 2.5921337875503256e-05,
|
|
"loss": 0.2789,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 1.4821591948764867,
|
|
"grad_norm": 5.319028854370117,
|
|
"learning_rate": 2.5766491173738e-05,
|
|
"loss": 0.265,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 1.4913083257090576,
|
|
"grad_norm": 6.620922088623047,
|
|
"learning_rate": 2.5611644471972746e-05,
|
|
"loss": 0.2442,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 1.5004574565416284,
|
|
"grad_norm": 5.458837032318115,
|
|
"learning_rate": 2.5456797770207498e-05,
|
|
"loss": 0.252,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 1.5096065873741995,
|
|
"grad_norm": 5.415153503417969,
|
|
"learning_rate": 2.5301951068442243e-05,
|
|
"loss": 0.237,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 1.5187557182067704,
|
|
"grad_norm": 4.952278137207031,
|
|
"learning_rate": 2.5147104366676995e-05,
|
|
"loss": 0.2577,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 1.5279048490393414,
|
|
"grad_norm": 4.834970951080322,
|
|
"learning_rate": 2.499225766491174e-05,
|
|
"loss": 0.2452,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 1.5370539798719123,
|
|
"grad_norm": 5.410050392150879,
|
|
"learning_rate": 2.4837410963146488e-05,
|
|
"loss": 0.2258,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 1.5462031107044831,
|
|
"grad_norm": 4.104517936706543,
|
|
"learning_rate": 2.4682564261381233e-05,
|
|
"loss": 0.229,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 1.555352241537054,
|
|
"grad_norm": 4.475819110870361,
|
|
"learning_rate": 2.452771755961598e-05,
|
|
"loss": 0.2589,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 1.5645013723696248,
|
|
"grad_norm": 3.8395609855651855,
|
|
"learning_rate": 2.437287085785073e-05,
|
|
"loss": 0.2269,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 1.5736505032021957,
|
|
"grad_norm": 4.9355621337890625,
|
|
"learning_rate": 2.4218024156085475e-05,
|
|
"loss": 0.2625,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 1.5827996340347665,
|
|
"grad_norm": 4.053934097290039,
|
|
"learning_rate": 2.4063177454320223e-05,
|
|
"loss": 0.2559,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 1.5919487648673376,
|
|
"grad_norm": 5.001983642578125,
|
|
"learning_rate": 2.3908330752554972e-05,
|
|
"loss": 0.23,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 1.6010978956999085,
|
|
"grad_norm": 5.705740928649902,
|
|
"learning_rate": 2.375348405078972e-05,
|
|
"loss": 0.2173,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 1.6102470265324795,
|
|
"grad_norm": 4.854909420013428,
|
|
"learning_rate": 2.3598637349024465e-05,
|
|
"loss": 0.2297,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 1.6193961573650504,
|
|
"grad_norm": 3.785277843475342,
|
|
"learning_rate": 2.3443790647259214e-05,
|
|
"loss": 0.2065,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 1.6285452881976212,
|
|
"grad_norm": 5.307765960693359,
|
|
"learning_rate": 2.3288943945493962e-05,
|
|
"loss": 0.2246,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 1.637694419030192,
|
|
"grad_norm": 5.032717704772949,
|
|
"learning_rate": 2.3134097243728707e-05,
|
|
"loss": 0.2168,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 1.646843549862763,
|
|
"grad_norm": 4.665537357330322,
|
|
"learning_rate": 2.2979250541963456e-05,
|
|
"loss": 0.2409,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 1.6559926806953338,
|
|
"grad_norm": 4.126980304718018,
|
|
"learning_rate": 2.2824403840198204e-05,
|
|
"loss": 0.2397,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 1.6651418115279049,
|
|
"grad_norm": 5.973440170288086,
|
|
"learning_rate": 2.2669557138432952e-05,
|
|
"loss": 0.2654,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 1.6742909423604757,
|
|
"grad_norm": 4.972531795501709,
|
|
"learning_rate": 2.25147104366677e-05,
|
|
"loss": 0.2636,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 1.6834400731930468,
|
|
"grad_norm": 6.962503910064697,
|
|
"learning_rate": 2.235986373490245e-05,
|
|
"loss": 0.2629,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 1.6925892040256176,
|
|
"grad_norm": 4.002923488616943,
|
|
"learning_rate": 2.2205017033137198e-05,
|
|
"loss": 0.2333,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 1.7017383348581885,
|
|
"grad_norm": 5.305150985717773,
|
|
"learning_rate": 2.2050170331371943e-05,
|
|
"loss": 0.2535,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 1.7108874656907593,
|
|
"grad_norm": 4.577486038208008,
|
|
"learning_rate": 2.189532362960669e-05,
|
|
"loss": 0.2307,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 1.7200365965233302,
|
|
"grad_norm": 4.220026016235352,
|
|
"learning_rate": 2.174047692784144e-05,
|
|
"loss": 0.2461,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 1.729185727355901,
|
|
"grad_norm": 5.4357428550720215,
|
|
"learning_rate": 2.1585630226076188e-05,
|
|
"loss": 0.2297,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 1.738334858188472,
|
|
"grad_norm": 5.218511581420898,
|
|
"learning_rate": 2.1430783524310933e-05,
|
|
"loss": 0.2419,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 1.747483989021043,
|
|
"grad_norm": 6.166689395904541,
|
|
"learning_rate": 2.127593682254568e-05,
|
|
"loss": 0.2471,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 1.756633119853614,
|
|
"grad_norm": 5.226531982421875,
|
|
"learning_rate": 2.112109012078043e-05,
|
|
"loss": 0.238,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 1.7657822506861849,
|
|
"grad_norm": 6.10182523727417,
|
|
"learning_rate": 2.0966243419015175e-05,
|
|
"loss": 0.2654,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 1.7749313815187557,
|
|
"grad_norm": 4.4128737449646,
|
|
"learning_rate": 2.0811396717249923e-05,
|
|
"loss": 0.23,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 1.7840805123513266,
|
|
"grad_norm": 4.541961193084717,
|
|
"learning_rate": 2.065655001548467e-05,
|
|
"loss": 0.2067,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 1.7932296431838974,
|
|
"grad_norm": 8.150908470153809,
|
|
"learning_rate": 2.050170331371942e-05,
|
|
"loss": 0.224,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 1.8023787740164683,
|
|
"grad_norm": 4.411103248596191,
|
|
"learning_rate": 2.0346856611954165e-05,
|
|
"loss": 0.2244,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 1.8115279048490394,
|
|
"grad_norm": 4.345833778381348,
|
|
"learning_rate": 2.0192009910188913e-05,
|
|
"loss": 0.225,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 1.8206770356816102,
|
|
"grad_norm": 4.550020694732666,
|
|
"learning_rate": 2.0037163208423662e-05,
|
|
"loss": 0.2406,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 1.8298261665141813,
|
|
"grad_norm": 3.8560264110565186,
|
|
"learning_rate": 1.988231650665841e-05,
|
|
"loss": 0.2461,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 1.8298261665141813,
|
|
"eval_loss": 0.29912057518959045,
|
|
"eval_runtime": 422.9004,
|
|
"eval_samples_per_second": 1.823,
|
|
"eval_steps_per_second": 0.116,
|
|
"eval_wer": 0.7281651997291808,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 1.8389752973467521,
|
|
"grad_norm": 5.020371437072754,
|
|
"learning_rate": 1.9727469804893155e-05,
|
|
"loss": 0.2257,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 1.848124428179323,
|
|
"grad_norm": 6.07639741897583,
|
|
"learning_rate": 1.9572623103127904e-05,
|
|
"loss": 0.2146,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 1.8572735590118938,
|
|
"grad_norm": 5.103982925415039,
|
|
"learning_rate": 1.9417776401362652e-05,
|
|
"loss": 0.2248,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 1.8664226898444647,
|
|
"grad_norm": 5.3223042488098145,
|
|
"learning_rate": 1.9262929699597397e-05,
|
|
"loss": 0.2162,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 1.8755718206770355,
|
|
"grad_norm": 4.5631103515625,
|
|
"learning_rate": 1.9108082997832146e-05,
|
|
"loss": 0.2279,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 1.8847209515096066,
|
|
"grad_norm": 4.72071647644043,
|
|
"learning_rate": 1.8953236296066894e-05,
|
|
"loss": 0.251,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 1.8938700823421775,
|
|
"grad_norm": 4.969239234924316,
|
|
"learning_rate": 1.8798389594301642e-05,
|
|
"loss": 0.2172,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 1.9030192131747485,
|
|
"grad_norm": 4.407639026641846,
|
|
"learning_rate": 1.864354289253639e-05,
|
|
"loss": 0.2098,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 1.9121683440073194,
|
|
"grad_norm": 3.3802950382232666,
|
|
"learning_rate": 1.8488696190771136e-05,
|
|
"loss": 0.2192,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 1.9213174748398902,
|
|
"grad_norm": 4.947459697723389,
|
|
"learning_rate": 1.8333849489005884e-05,
|
|
"loss": 0.2278,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 1.930466605672461,
|
|
"grad_norm": 4.750110626220703,
|
|
"learning_rate": 1.8179002787240633e-05,
|
|
"loss": 0.2185,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 1.939615736505032,
|
|
"grad_norm": 4.515120506286621,
|
|
"learning_rate": 1.802415608547538e-05,
|
|
"loss": 0.2045,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 1.9487648673376028,
|
|
"grad_norm": 4.280106067657471,
|
|
"learning_rate": 1.786930938371013e-05,
|
|
"loss": 0.2051,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 1.9579139981701739,
|
|
"grad_norm": 4.002866268157959,
|
|
"learning_rate": 1.7714462681944878e-05,
|
|
"loss": 0.2301,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 1.9670631290027447,
|
|
"grad_norm": 4.178459644317627,
|
|
"learning_rate": 1.7559615980179623e-05,
|
|
"loss": 0.2202,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 1.9762122598353158,
|
|
"grad_norm": 6.406257629394531,
|
|
"learning_rate": 1.740476927841437e-05,
|
|
"loss": 0.225,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 1.9853613906678866,
|
|
"grad_norm": 4.606039524078369,
|
|
"learning_rate": 1.724992257664912e-05,
|
|
"loss": 0.2446,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 1.9945105215004575,
|
|
"grad_norm": 4.238482475280762,
|
|
"learning_rate": 1.7095075874883865e-05,
|
|
"loss": 0.2524,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 2.0036596523330283,
|
|
"grad_norm": 3.64787220954895,
|
|
"learning_rate": 1.6940229173118613e-05,
|
|
"loss": 0.218,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 2.012808783165599,
|
|
"grad_norm": 3.7717037200927734,
|
|
"learning_rate": 1.6785382471353362e-05,
|
|
"loss": 0.1669,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 2.02195791399817,
|
|
"grad_norm": 2.716965675354004,
|
|
"learning_rate": 1.663053576958811e-05,
|
|
"loss": 0.1806,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 2.031107044830741,
|
|
"grad_norm": 4.059733867645264,
|
|
"learning_rate": 1.6475689067822855e-05,
|
|
"loss": 0.186,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 2.040256175663312,
|
|
"grad_norm": 4.125363349914551,
|
|
"learning_rate": 1.6320842366057604e-05,
|
|
"loss": 0.1872,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 2.049405306495883,
|
|
"grad_norm": 3.4910032749176025,
|
|
"learning_rate": 1.6165995664292352e-05,
|
|
"loss": 0.1519,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 2.058554437328454,
|
|
"grad_norm": 2.8993113040924072,
|
|
"learning_rate": 1.6011148962527097e-05,
|
|
"loss": 0.1744,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 2.0677035681610247,
|
|
"grad_norm": 4.730359077453613,
|
|
"learning_rate": 1.5856302260761845e-05,
|
|
"loss": 0.1627,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 2.0768526989935956,
|
|
"grad_norm": 5.577477931976318,
|
|
"learning_rate": 1.5701455558996594e-05,
|
|
"loss": 0.1753,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 2.0860018298261664,
|
|
"grad_norm": 4.823721885681152,
|
|
"learning_rate": 1.5546608857231342e-05,
|
|
"loss": 0.1821,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 2.0951509606587373,
|
|
"grad_norm": 3.287593364715576,
|
|
"learning_rate": 1.5391762155466087e-05,
|
|
"loss": 0.1573,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 2.104300091491308,
|
|
"grad_norm": 5.850045204162598,
|
|
"learning_rate": 1.5236915453700837e-05,
|
|
"loss": 0.1786,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 2.1134492223238794,
|
|
"grad_norm": 4.332837104797363,
|
|
"learning_rate": 1.5082068751935586e-05,
|
|
"loss": 0.1884,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 2.1225983531564503,
|
|
"grad_norm": 3.266853094100952,
|
|
"learning_rate": 1.4927222050170331e-05,
|
|
"loss": 0.1589,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 2.131747483989021,
|
|
"grad_norm": 3.742208242416382,
|
|
"learning_rate": 1.477237534840508e-05,
|
|
"loss": 0.1691,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 2.140896614821592,
|
|
"grad_norm": 4.098796844482422,
|
|
"learning_rate": 1.4617528646639828e-05,
|
|
"loss": 0.1622,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 2.150045745654163,
|
|
"grad_norm": 5.091181755065918,
|
|
"learning_rate": 1.4462681944874576e-05,
|
|
"loss": 0.185,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 2.1591948764867337,
|
|
"grad_norm": 3.2334043979644775,
|
|
"learning_rate": 1.4307835243109321e-05,
|
|
"loss": 0.1573,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 2.1683440073193045,
|
|
"grad_norm": 3.941044569015503,
|
|
"learning_rate": 1.415298854134407e-05,
|
|
"loss": 0.17,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 2.1774931381518754,
|
|
"grad_norm": 4.680139541625977,
|
|
"learning_rate": 1.3998141839578818e-05,
|
|
"loss": 0.1652,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 2.1866422689844462,
|
|
"grad_norm": 3.671124219894409,
|
|
"learning_rate": 1.3843295137813565e-05,
|
|
"loss": 0.1637,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 2.1957913998170175,
|
|
"grad_norm": 3.4199767112731934,
|
|
"learning_rate": 1.3688448436048313e-05,
|
|
"loss": 0.1822,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 2.2049405306495884,
|
|
"grad_norm": 4.193777084350586,
|
|
"learning_rate": 1.353360173428306e-05,
|
|
"loss": 0.1724,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 2.2140896614821592,
|
|
"grad_norm": 3.5047738552093506,
|
|
"learning_rate": 1.3378755032517808e-05,
|
|
"loss": 0.1661,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 2.22323879231473,
|
|
"grad_norm": 4.056273460388184,
|
|
"learning_rate": 1.3223908330752555e-05,
|
|
"loss": 0.1476,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 2.232387923147301,
|
|
"grad_norm": 5.089756488800049,
|
|
"learning_rate": 1.3069061628987303e-05,
|
|
"loss": 0.1785,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 2.241537053979872,
|
|
"grad_norm": 3.5870766639709473,
|
|
"learning_rate": 1.2914214927222052e-05,
|
|
"loss": 0.1835,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 2.2506861848124426,
|
|
"grad_norm": 3.9031713008880615,
|
|
"learning_rate": 1.2759368225456797e-05,
|
|
"loss": 0.1725,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 2.259835315645014,
|
|
"grad_norm": 4.2854437828063965,
|
|
"learning_rate": 1.2604521523691545e-05,
|
|
"loss": 0.1774,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 2.268984446477585,
|
|
"grad_norm": 4.6277756690979,
|
|
"learning_rate": 1.2449674821926294e-05,
|
|
"loss": 0.1583,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 2.2781335773101556,
|
|
"grad_norm": 5.180362224578857,
|
|
"learning_rate": 1.229482812016104e-05,
|
|
"loss": 0.1636,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 2.2872827081427265,
|
|
"grad_norm": 2.9935238361358643,
|
|
"learning_rate": 1.2139981418395789e-05,
|
|
"loss": 0.1486,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 2.2872827081427265,
|
|
"eval_loss": 0.2814071476459503,
|
|
"eval_runtime": 424.1932,
|
|
"eval_samples_per_second": 1.818,
|
|
"eval_steps_per_second": 0.116,
|
|
"eval_wer": 0.7055808105232615,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 2.2964318389752973,
|
|
"grad_norm": 3.3283779621124268,
|
|
"learning_rate": 1.1985134716630536e-05,
|
|
"loss": 0.158,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 2.305580969807868,
|
|
"grad_norm": 4.186689376831055,
|
|
"learning_rate": 1.1830288014865284e-05,
|
|
"loss": 0.1598,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 2.314730100640439,
|
|
"grad_norm": 3.5572612285614014,
|
|
"learning_rate": 1.167544131310003e-05,
|
|
"loss": 0.1492,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 2.32387923147301,
|
|
"grad_norm": 2.8076884746551514,
|
|
"learning_rate": 1.1520594611334779e-05,
|
|
"loss": 0.1637,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 2.3330283623055807,
|
|
"grad_norm": 4.602914810180664,
|
|
"learning_rate": 1.1365747909569528e-05,
|
|
"loss": 0.1526,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 2.342177493138152,
|
|
"grad_norm": 2.5850772857666016,
|
|
"learning_rate": 1.1210901207804274e-05,
|
|
"loss": 0.159,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 2.351326623970723,
|
|
"grad_norm": 5.045381546020508,
|
|
"learning_rate": 1.1056054506039023e-05,
|
|
"loss": 0.1503,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 2.3604757548032937,
|
|
"grad_norm": 4.628170967102051,
|
|
"learning_rate": 1.090120780427377e-05,
|
|
"loss": 0.159,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 2.3696248856358646,
|
|
"grad_norm": 3.4683902263641357,
|
|
"learning_rate": 1.0746361102508518e-05,
|
|
"loss": 0.1613,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 2.3787740164684354,
|
|
"grad_norm": 4.1546525955200195,
|
|
"learning_rate": 1.0591514400743265e-05,
|
|
"loss": 0.1482,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 2.3879231473010063,
|
|
"grad_norm": 5.595340251922607,
|
|
"learning_rate": 1.0436667698978013e-05,
|
|
"loss": 0.1654,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 2.397072278133577,
|
|
"grad_norm": 4.809768199920654,
|
|
"learning_rate": 1.028182099721276e-05,
|
|
"loss": 0.1457,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 2.4062214089661484,
|
|
"grad_norm": 3.541982889175415,
|
|
"learning_rate": 1.0126974295447506e-05,
|
|
"loss": 0.163,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 2.4153705397987193,
|
|
"grad_norm": 5.883151054382324,
|
|
"learning_rate": 9.972127593682255e-06,
|
|
"loss": 0.1761,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 2.42451967063129,
|
|
"grad_norm": 4.718671798706055,
|
|
"learning_rate": 9.817280891917002e-06,
|
|
"loss": 0.1562,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 2.433668801463861,
|
|
"grad_norm": 3.135131597518921,
|
|
"learning_rate": 9.66243419015175e-06,
|
|
"loss": 0.1669,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 2.442817932296432,
|
|
"grad_norm": 5.202821254730225,
|
|
"learning_rate": 9.507587488386498e-06,
|
|
"loss": 0.1748,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 2.4519670631290027,
|
|
"grad_norm": 5.344453811645508,
|
|
"learning_rate": 9.352740786621247e-06,
|
|
"loss": 0.1641,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 2.4611161939615736,
|
|
"grad_norm": 3.3761284351348877,
|
|
"learning_rate": 9.197894084855993e-06,
|
|
"loss": 0.1574,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 2.4702653247941444,
|
|
"grad_norm": 5.866576671600342,
|
|
"learning_rate": 9.04304738309074e-06,
|
|
"loss": 0.1621,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 2.4794144556267153,
|
|
"grad_norm": 4.291085720062256,
|
|
"learning_rate": 8.888200681325489e-06,
|
|
"loss": 0.1614,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 2.4885635864592865,
|
|
"grad_norm": 2.837286949157715,
|
|
"learning_rate": 8.733353979560235e-06,
|
|
"loss": 0.1654,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 2.4977127172918574,
|
|
"grad_norm": 3.848227024078369,
|
|
"learning_rate": 8.578507277794984e-06,
|
|
"loss": 0.1578,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 2.5068618481244282,
|
|
"grad_norm": 3.820240020751953,
|
|
"learning_rate": 8.42366057602973e-06,
|
|
"loss": 0.1627,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 2.516010978956999,
|
|
"grad_norm": 3.1845788955688477,
|
|
"learning_rate": 8.268813874264479e-06,
|
|
"loss": 0.1718,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 2.52516010978957,
|
|
"grad_norm": 4.4272236824035645,
|
|
"learning_rate": 8.113967172499226e-06,
|
|
"loss": 0.1624,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 2.534309240622141,
|
|
"grad_norm": 3.211336374282837,
|
|
"learning_rate": 7.959120470733972e-06,
|
|
"loss": 0.1571,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 2.5434583714547117,
|
|
"grad_norm": 3.920867443084717,
|
|
"learning_rate": 7.80427376896872e-06,
|
|
"loss": 0.1499,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 2.552607502287283,
|
|
"grad_norm": 5.123950481414795,
|
|
"learning_rate": 7.64942706720347e-06,
|
|
"loss": 0.1475,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 2.5617566331198534,
|
|
"grad_norm": 3.7110486030578613,
|
|
"learning_rate": 7.494580365438217e-06,
|
|
"loss": 0.1552,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 2.5709057639524246,
|
|
"grad_norm": 4.068341255187988,
|
|
"learning_rate": 7.3397336636729635e-06,
|
|
"loss": 0.1494,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 2.5800548947849955,
|
|
"grad_norm": 4.653831958770752,
|
|
"learning_rate": 7.184886961907712e-06,
|
|
"loss": 0.161,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 2.5892040256175664,
|
|
"grad_norm": 3.5324552059173584,
|
|
"learning_rate": 7.0300402601424595e-06,
|
|
"loss": 0.16,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 2.598353156450137,
|
|
"grad_norm": 5.100922107696533,
|
|
"learning_rate": 6.875193558377208e-06,
|
|
"loss": 0.1549,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 2.607502287282708,
|
|
"grad_norm": 3.772149085998535,
|
|
"learning_rate": 6.720346856611955e-06,
|
|
"loss": 0.1613,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 2.616651418115279,
|
|
"grad_norm": 4.288483619689941,
|
|
"learning_rate": 6.565500154846701e-06,
|
|
"loss": 0.1605,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 2.6258005489478498,
|
|
"grad_norm": 3.9227993488311768,
|
|
"learning_rate": 6.41065345308145e-06,
|
|
"loss": 0.1538,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 2.634949679780421,
|
|
"grad_norm": 3.3688392639160156,
|
|
"learning_rate": 6.255806751316197e-06,
|
|
"loss": 0.173,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 2.644098810612992,
|
|
"grad_norm": 3.6099278926849365,
|
|
"learning_rate": 6.100960049550945e-06,
|
|
"loss": 0.1739,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 2.6532479414455628,
|
|
"grad_norm": 3.802189826965332,
|
|
"learning_rate": 5.9461133477856925e-06,
|
|
"loss": 0.1506,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 2.6623970722781336,
|
|
"grad_norm": 3.382754325866699,
|
|
"learning_rate": 5.79126664602044e-06,
|
|
"loss": 0.1701,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 2.6715462031107045,
|
|
"grad_norm": 3.056814193725586,
|
|
"learning_rate": 5.636419944255188e-06,
|
|
"loss": 0.1638,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 2.6806953339432753,
|
|
"grad_norm": 3.345564842224121,
|
|
"learning_rate": 5.481573242489935e-06,
|
|
"loss": 0.1318,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 2.689844464775846,
|
|
"grad_norm": 3.740990400314331,
|
|
"learning_rate": 5.326726540724683e-06,
|
|
"loss": 0.1611,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 2.6989935956084175,
|
|
"grad_norm": 2.8473143577575684,
|
|
"learning_rate": 5.17187983895943e-06,
|
|
"loss": 0.1684,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 2.708142726440988,
|
|
"grad_norm": 2.8555662631988525,
|
|
"learning_rate": 5.017033137194178e-06,
|
|
"loss": 0.1883,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 2.717291857273559,
|
|
"grad_norm": 4.181397438049316,
|
|
"learning_rate": 4.8621864354289254e-06,
|
|
"loss": 0.1677,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 2.72644098810613,
|
|
"grad_norm": 4.9955949783325195,
|
|
"learning_rate": 4.707339733663673e-06,
|
|
"loss": 0.1724,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 2.735590118938701,
|
|
"grad_norm": 3.999300956726074,
|
|
"learning_rate": 4.552493031898421e-06,
|
|
"loss": 0.1433,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 2.7447392497712717,
|
|
"grad_norm": 3.054906129837036,
|
|
"learning_rate": 4.397646330133168e-06,
|
|
"loss": 0.1796,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 2.7447392497712717,
|
|
"eval_loss": 0.2692735195159912,
|
|
"eval_runtime": 414.8696,
|
|
"eval_samples_per_second": 1.858,
|
|
"eval_steps_per_second": 0.118,
|
|
"eval_wer": 0.6916529645033369,
|
|
"step": 3000
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 3279,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.18116487028736e+18,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|