tuanio's picture
End of training
a1f4e7a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 61.0,
"eval_steps": 2500,
"global_step": 29829,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 3.21823667448877e-07,
"loss": 17.0035,
"step": 50
},
{
"epoch": 0.2,
"learning_rate": 6.503519946362723e-07,
"loss": 17.0224,
"step": 100
},
{
"epoch": 0.31,
"learning_rate": 9.85584981562186e-07,
"loss": 16.4754,
"step": 150
},
{
"epoch": 0.41,
"learning_rate": 1.3208179684880992e-06,
"loss": 16.8572,
"step": 200
},
{
"epoch": 0.51,
"learning_rate": 1.6560509554140127e-06,
"loss": 16.3479,
"step": 250
},
{
"epoch": 0.61,
"learning_rate": 1.9912839423399265e-06,
"loss": 15.4566,
"step": 300
},
{
"epoch": 0.72,
"learning_rate": 2.32651692926584e-06,
"loss": 15.0735,
"step": 350
},
{
"epoch": 0.82,
"learning_rate": 2.6617499161917535e-06,
"loss": 13.7507,
"step": 400
},
{
"epoch": 0.92,
"learning_rate": 2.9969829031176675e-06,
"loss": 12.5598,
"step": 450
},
{
"epoch": 1.02,
"learning_rate": 3.332215890043581e-06,
"loss": 11.5457,
"step": 500
},
{
"epoch": 1.12,
"learning_rate": 3.6674488769694945e-06,
"loss": 10.3925,
"step": 550
},
{
"epoch": 1.23,
"learning_rate": 4.002681863895408e-06,
"loss": 9.9256,
"step": 600
},
{
"epoch": 1.33,
"learning_rate": 4.337914850821321e-06,
"loss": 9.0534,
"step": 650
},
{
"epoch": 1.43,
"learning_rate": 4.673147837747235e-06,
"loss": 8.5178,
"step": 700
},
{
"epoch": 1.53,
"learning_rate": 5.008380824673148e-06,
"loss": 8.075,
"step": 750
},
{
"epoch": 1.64,
"learning_rate": 5.343613811599062e-06,
"loss": 7.6319,
"step": 800
},
{
"epoch": 1.74,
"learning_rate": 5.678846798524975e-06,
"loss": 7.133,
"step": 850
},
{
"epoch": 1.84,
"learning_rate": 6.01407978545089e-06,
"loss": 6.7126,
"step": 900
},
{
"epoch": 1.94,
"learning_rate": 6.349312772376802e-06,
"loss": 6.3321,
"step": 950
},
{
"epoch": 2.04,
"learning_rate": 6.684545759302717e-06,
"loss": 6.1839,
"step": 1000
},
{
"epoch": 2.15,
"learning_rate": 7.019778746228629e-06,
"loss": 5.7829,
"step": 1050
},
{
"epoch": 2.25,
"learning_rate": 7.355011733154544e-06,
"loss": 5.5009,
"step": 1100
},
{
"epoch": 2.35,
"learning_rate": 7.690244720080457e-06,
"loss": 5.273,
"step": 1150
},
{
"epoch": 2.45,
"learning_rate": 8.02547770700637e-06,
"loss": 4.9947,
"step": 1200
},
{
"epoch": 2.56,
"learning_rate": 8.360710693932284e-06,
"loss": 4.7348,
"step": 1250
},
{
"epoch": 2.66,
"learning_rate": 8.695943680858197e-06,
"loss": 4.4955,
"step": 1300
},
{
"epoch": 2.76,
"learning_rate": 9.031176667784111e-06,
"loss": 4.4041,
"step": 1350
},
{
"epoch": 2.86,
"learning_rate": 9.366409654710024e-06,
"loss": 4.2109,
"step": 1400
},
{
"epoch": 2.97,
"learning_rate": 9.701642641635938e-06,
"loss": 4.0628,
"step": 1450
},
{
"epoch": 3.07,
"learning_rate": 1.0036875628561851e-05,
"loss": 3.9396,
"step": 1500
},
{
"epoch": 3.17,
"learning_rate": 1.0372108615487765e-05,
"loss": 3.8049,
"step": 1550
},
{
"epoch": 3.27,
"learning_rate": 1.070734160241368e-05,
"loss": 3.7435,
"step": 1600
},
{
"epoch": 3.37,
"learning_rate": 1.1042574589339592e-05,
"loss": 3.6665,
"step": 1650
},
{
"epoch": 3.48,
"learning_rate": 1.1377807576265505e-05,
"loss": 3.6305,
"step": 1700
},
{
"epoch": 3.58,
"learning_rate": 1.1713040563191418e-05,
"loss": 3.5934,
"step": 1750
},
{
"epoch": 3.68,
"learning_rate": 1.2048273550117334e-05,
"loss": 3.5484,
"step": 1800
},
{
"epoch": 3.78,
"learning_rate": 1.2383506537043247e-05,
"loss": 3.528,
"step": 1850
},
{
"epoch": 3.89,
"learning_rate": 1.271873952396916e-05,
"loss": 3.5137,
"step": 1900
},
{
"epoch": 3.99,
"learning_rate": 1.3053972510895072e-05,
"loss": 3.4936,
"step": 1950
},
{
"epoch": 4.09,
"learning_rate": 1.3389205497820988e-05,
"loss": 3.4889,
"step": 2000
},
{
"epoch": 4.19,
"learning_rate": 1.37244384847469e-05,
"loss": 3.475,
"step": 2050
},
{
"epoch": 4.29,
"learning_rate": 1.4059671471672813e-05,
"loss": 3.4778,
"step": 2100
},
{
"epoch": 4.4,
"learning_rate": 1.4394904458598726e-05,
"loss": 3.4733,
"step": 2150
},
{
"epoch": 4.5,
"learning_rate": 1.4730137445524642e-05,
"loss": 3.4635,
"step": 2200
},
{
"epoch": 4.6,
"learning_rate": 1.5065370432450555e-05,
"loss": 3.4669,
"step": 2250
},
{
"epoch": 4.7,
"learning_rate": 1.540060341937647e-05,
"loss": 3.4541,
"step": 2300
},
{
"epoch": 4.81,
"learning_rate": 1.573583640630238e-05,
"loss": 3.4526,
"step": 2350
},
{
"epoch": 4.91,
"learning_rate": 1.6071069393228294e-05,
"loss": 3.4508,
"step": 2400
},
{
"epoch": 5.01,
"learning_rate": 1.640630238015421e-05,
"loss": 3.4412,
"step": 2450
},
{
"epoch": 5.11,
"learning_rate": 1.6741535367080123e-05,
"loss": 3.4352,
"step": 2500
},
{
"epoch": 5.11,
"eval_loss": 3.64640212059021,
"eval_runtime": 6.2703,
"eval_samples_per_second": 226.463,
"eval_steps_per_second": 14.194,
"eval_wer": 1.0,
"step": 2500
},
{
"epoch": 5.21,
"learning_rate": 1.7076768354006034e-05,
"loss": 3.4393,
"step": 2550
},
{
"epoch": 5.32,
"learning_rate": 1.741200134093195e-05,
"loss": 3.4385,
"step": 2600
},
{
"epoch": 5.42,
"learning_rate": 1.7747234327857863e-05,
"loss": 3.4236,
"step": 2650
},
{
"epoch": 5.52,
"learning_rate": 1.8082467314783777e-05,
"loss": 3.4318,
"step": 2700
},
{
"epoch": 5.62,
"learning_rate": 1.8417700301709688e-05,
"loss": 3.402,
"step": 2750
},
{
"epoch": 5.73,
"learning_rate": 1.8752933288635603e-05,
"loss": 3.3865,
"step": 2800
},
{
"epoch": 5.83,
"learning_rate": 1.9088166275561517e-05,
"loss": 3.3609,
"step": 2850
},
{
"epoch": 5.93,
"learning_rate": 1.942339926248743e-05,
"loss": 3.3346,
"step": 2900
},
{
"epoch": 6.03,
"learning_rate": 1.9758632249413342e-05,
"loss": 3.2911,
"step": 2950
},
{
"epoch": 6.13,
"learning_rate": 1.99895701408031e-05,
"loss": 3.2645,
"step": 3000
},
{
"epoch": 6.24,
"learning_rate": 1.995232064367131e-05,
"loss": 3.2486,
"step": 3050
},
{
"epoch": 6.34,
"learning_rate": 1.9915071146539525e-05,
"loss": 3.2108,
"step": 3100
},
{
"epoch": 6.44,
"learning_rate": 1.9877821649407735e-05,
"loss": 3.1756,
"step": 3150
},
{
"epoch": 6.54,
"learning_rate": 1.9840572152275946e-05,
"loss": 3.1475,
"step": 3200
},
{
"epoch": 6.65,
"learning_rate": 1.9803322655144156e-05,
"loss": 3.1179,
"step": 3250
},
{
"epoch": 6.75,
"learning_rate": 1.976607315801237e-05,
"loss": 3.0927,
"step": 3300
},
{
"epoch": 6.85,
"learning_rate": 1.972882366088058e-05,
"loss": 3.0755,
"step": 3350
},
{
"epoch": 6.95,
"learning_rate": 1.969157416374879e-05,
"loss": 3.0373,
"step": 3400
},
{
"epoch": 7.06,
"learning_rate": 1.9654324666617e-05,
"loss": 3.0497,
"step": 3450
},
{
"epoch": 7.16,
"learning_rate": 1.9617075169485215e-05,
"loss": 3.0105,
"step": 3500
},
{
"epoch": 7.26,
"learning_rate": 1.9579825672353425e-05,
"loss": 3.013,
"step": 3550
},
{
"epoch": 7.36,
"learning_rate": 1.9542576175221636e-05,
"loss": 2.9853,
"step": 3600
},
{
"epoch": 7.46,
"learning_rate": 1.9505326678089846e-05,
"loss": 2.9777,
"step": 3650
},
{
"epoch": 7.57,
"learning_rate": 1.9468077180958057e-05,
"loss": 2.9633,
"step": 3700
},
{
"epoch": 7.67,
"learning_rate": 1.943082768382627e-05,
"loss": 2.9296,
"step": 3750
},
{
"epoch": 7.77,
"learning_rate": 1.939357818669448e-05,
"loss": 2.9084,
"step": 3800
},
{
"epoch": 7.87,
"learning_rate": 1.9356328689562695e-05,
"loss": 2.9028,
"step": 3850
},
{
"epoch": 7.98,
"learning_rate": 1.9319079192430902e-05,
"loss": 2.9068,
"step": 3900
},
{
"epoch": 8.08,
"learning_rate": 1.9281829695299116e-05,
"loss": 2.8757,
"step": 3950
},
{
"epoch": 8.18,
"learning_rate": 1.9244580198167326e-05,
"loss": 2.8462,
"step": 4000
},
{
"epoch": 8.28,
"learning_rate": 1.9207330701035537e-05,
"loss": 2.7924,
"step": 4050
},
{
"epoch": 8.38,
"learning_rate": 1.917008120390375e-05,
"loss": 2.7197,
"step": 4100
},
{
"epoch": 8.49,
"learning_rate": 1.913283170677196e-05,
"loss": 2.6297,
"step": 4150
},
{
"epoch": 8.59,
"learning_rate": 1.909558220964017e-05,
"loss": 2.596,
"step": 4200
},
{
"epoch": 8.69,
"learning_rate": 1.9058332712508382e-05,
"loss": 2.5293,
"step": 4250
},
{
"epoch": 8.79,
"learning_rate": 1.9021083215376596e-05,
"loss": 2.4852,
"step": 4300
},
{
"epoch": 8.9,
"learning_rate": 1.8983833718244806e-05,
"loss": 2.4593,
"step": 4350
},
{
"epoch": 9.0,
"learning_rate": 1.8946584221113016e-05,
"loss": 2.4033,
"step": 4400
},
{
"epoch": 9.1,
"learning_rate": 1.8909334723981227e-05,
"loss": 2.3753,
"step": 4450
},
{
"epoch": 9.2,
"learning_rate": 1.887208522684944e-05,
"loss": 2.3198,
"step": 4500
},
{
"epoch": 9.3,
"learning_rate": 1.883483572971765e-05,
"loss": 2.2542,
"step": 4550
},
{
"epoch": 9.41,
"learning_rate": 1.879758623258586e-05,
"loss": 2.2424,
"step": 4600
},
{
"epoch": 9.51,
"learning_rate": 1.8760336735454072e-05,
"loss": 2.1776,
"step": 4650
},
{
"epoch": 9.61,
"learning_rate": 1.8723087238322283e-05,
"loss": 2.1134,
"step": 4700
},
{
"epoch": 9.71,
"learning_rate": 1.8685837741190496e-05,
"loss": 2.1362,
"step": 4750
},
{
"epoch": 9.82,
"learning_rate": 1.8648588244058707e-05,
"loss": 2.0832,
"step": 4800
},
{
"epoch": 9.92,
"learning_rate": 1.861133874692692e-05,
"loss": 1.9921,
"step": 4850
},
{
"epoch": 10.02,
"learning_rate": 1.8574089249795128e-05,
"loss": 1.9646,
"step": 4900
},
{
"epoch": 10.12,
"learning_rate": 1.853683975266334e-05,
"loss": 1.9432,
"step": 4950
},
{
"epoch": 10.22,
"learning_rate": 1.8499590255531552e-05,
"loss": 1.9084,
"step": 5000
},
{
"epoch": 10.22,
"eval_loss": 1.7449651956558228,
"eval_runtime": 5.5709,
"eval_samples_per_second": 254.895,
"eval_steps_per_second": 15.976,
"eval_wer": 0.9274757193428338,
"step": 5000
},
{
"epoch": 10.33,
"learning_rate": 1.8462340758399762e-05,
"loss": 1.9073,
"step": 5050
},
{
"epoch": 10.43,
"learning_rate": 1.8425091261267973e-05,
"loss": 1.8539,
"step": 5100
},
{
"epoch": 10.53,
"learning_rate": 1.8387841764136187e-05,
"loss": 1.8199,
"step": 5150
},
{
"epoch": 10.63,
"learning_rate": 1.8350592267004397e-05,
"loss": 1.792,
"step": 5200
},
{
"epoch": 10.74,
"learning_rate": 1.8313342769872608e-05,
"loss": 1.7961,
"step": 5250
},
{
"epoch": 10.84,
"learning_rate": 1.827609327274082e-05,
"loss": 1.7536,
"step": 5300
},
{
"epoch": 10.94,
"learning_rate": 1.823884377560903e-05,
"loss": 1.7679,
"step": 5350
},
{
"epoch": 11.04,
"learning_rate": 1.8201594278477242e-05,
"loss": 1.6964,
"step": 5400
},
{
"epoch": 11.15,
"learning_rate": 1.816508977128809e-05,
"loss": 1.6543,
"step": 5450
},
{
"epoch": 11.25,
"learning_rate": 1.81278402741563e-05,
"loss": 1.6273,
"step": 5500
},
{
"epoch": 11.35,
"learning_rate": 1.8090590777024514e-05,
"loss": 1.6645,
"step": 5550
},
{
"epoch": 11.45,
"learning_rate": 1.805334127989272e-05,
"loss": 1.6498,
"step": 5600
},
{
"epoch": 11.55,
"learning_rate": 1.8016091782760935e-05,
"loss": 1.5384,
"step": 5650
},
{
"epoch": 11.66,
"learning_rate": 1.7978842285629145e-05,
"loss": 1.4915,
"step": 5700
},
{
"epoch": 11.76,
"learning_rate": 1.7941592788497356e-05,
"loss": 1.5428,
"step": 5750
},
{
"epoch": 11.86,
"learning_rate": 1.790434329136557e-05,
"loss": 1.5247,
"step": 5800
},
{
"epoch": 11.96,
"learning_rate": 1.786709379423378e-05,
"loss": 1.4698,
"step": 5850
},
{
"epoch": 12.07,
"learning_rate": 1.782984429710199e-05,
"loss": 1.4546,
"step": 5900
},
{
"epoch": 12.17,
"learning_rate": 1.77925947999702e-05,
"loss": 1.4295,
"step": 5950
},
{
"epoch": 12.27,
"learning_rate": 1.7755345302838415e-05,
"loss": 1.4489,
"step": 6000
},
{
"epoch": 12.37,
"learning_rate": 1.7718095805706625e-05,
"loss": 1.4124,
"step": 6050
},
{
"epoch": 12.47,
"learning_rate": 1.7680846308574836e-05,
"loss": 1.3897,
"step": 6100
},
{
"epoch": 12.58,
"learning_rate": 1.7643596811443046e-05,
"loss": 1.4136,
"step": 6150
},
{
"epoch": 12.68,
"learning_rate": 1.760634731431126e-05,
"loss": 1.3523,
"step": 6200
},
{
"epoch": 12.78,
"learning_rate": 1.756909781717947e-05,
"loss": 1.3617,
"step": 6250
},
{
"epoch": 12.88,
"learning_rate": 1.753184832004768e-05,
"loss": 1.3284,
"step": 6300
},
{
"epoch": 12.99,
"learning_rate": 1.749459882291589e-05,
"loss": 1.2548,
"step": 6350
},
{
"epoch": 13.09,
"learning_rate": 1.74573493257841e-05,
"loss": 1.3333,
"step": 6400
},
{
"epoch": 13.19,
"learning_rate": 1.7420099828652315e-05,
"loss": 1.2871,
"step": 6450
},
{
"epoch": 13.29,
"learning_rate": 1.7382850331520526e-05,
"loss": 1.2008,
"step": 6500
},
{
"epoch": 13.39,
"learning_rate": 1.7345600834388736e-05,
"loss": 1.235,
"step": 6550
},
{
"epoch": 13.5,
"learning_rate": 1.7308351337256947e-05,
"loss": 1.2407,
"step": 6600
},
{
"epoch": 13.6,
"learning_rate": 1.727110184012516e-05,
"loss": 1.2275,
"step": 6650
},
{
"epoch": 13.7,
"learning_rate": 1.723385234299337e-05,
"loss": 1.2474,
"step": 6700
},
{
"epoch": 13.8,
"learning_rate": 1.719660284586158e-05,
"loss": 1.2391,
"step": 6750
},
{
"epoch": 13.91,
"learning_rate": 1.7159353348729792e-05,
"loss": 1.2206,
"step": 6800
},
{
"epoch": 14.01,
"learning_rate": 1.7122103851598006e-05,
"loss": 1.178,
"step": 6850
},
{
"epoch": 14.11,
"learning_rate": 1.7084854354466216e-05,
"loss": 1.1396,
"step": 6900
},
{
"epoch": 14.21,
"learning_rate": 1.7047604857334427e-05,
"loss": 1.1398,
"step": 6950
},
{
"epoch": 14.31,
"learning_rate": 1.701035536020264e-05,
"loss": 1.1479,
"step": 7000
},
{
"epoch": 14.42,
"learning_rate": 1.6973105863070847e-05,
"loss": 1.1222,
"step": 7050
},
{
"epoch": 14.52,
"learning_rate": 1.693585636593906e-05,
"loss": 1.1401,
"step": 7100
},
{
"epoch": 14.62,
"learning_rate": 1.6898606868807272e-05,
"loss": 1.0867,
"step": 7150
},
{
"epoch": 14.72,
"learning_rate": 1.6861357371675486e-05,
"loss": 1.0785,
"step": 7200
},
{
"epoch": 14.83,
"learning_rate": 1.6824107874543696e-05,
"loss": 1.1391,
"step": 7250
},
{
"epoch": 14.93,
"learning_rate": 1.6786858377411906e-05,
"loss": 1.1081,
"step": 7300
},
{
"epoch": 15.03,
"learning_rate": 1.6749608880280117e-05,
"loss": 1.0664,
"step": 7350
},
{
"epoch": 15.13,
"learning_rate": 1.6712359383148327e-05,
"loss": 1.086,
"step": 7400
},
{
"epoch": 15.24,
"learning_rate": 1.667510988601654e-05,
"loss": 1.0667,
"step": 7450
},
{
"epoch": 15.34,
"learning_rate": 1.663860537882739e-05,
"loss": 1.0531,
"step": 7500
},
{
"epoch": 15.34,
"eval_loss": 1.1179660558700562,
"eval_runtime": 5.8029,
"eval_samples_per_second": 244.705,
"eval_steps_per_second": 15.337,
"eval_wer": 0.664609240265045,
"step": 7500
},
{
"epoch": 15.44,
"learning_rate": 1.66013558816956e-05,
"loss": 1.0252,
"step": 7550
},
{
"epoch": 15.54,
"learning_rate": 1.656410638456381e-05,
"loss": 1.0528,
"step": 7600
},
{
"epoch": 15.64,
"learning_rate": 1.6527601877374657e-05,
"loss": 1.0085,
"step": 7650
},
{
"epoch": 15.75,
"learning_rate": 1.6490352380242867e-05,
"loss": 1.0533,
"step": 7700
},
{
"epoch": 15.85,
"learning_rate": 1.6453102883111078e-05,
"loss": 0.9804,
"step": 7750
},
{
"epoch": 15.95,
"learning_rate": 1.641585338597929e-05,
"loss": 0.9825,
"step": 7800
},
{
"epoch": 16.05,
"learning_rate": 1.6378603888847502e-05,
"loss": 0.9582,
"step": 7850
},
{
"epoch": 16.16,
"learning_rate": 1.6341354391715713e-05,
"loss": 1.0105,
"step": 7900
},
{
"epoch": 16.26,
"learning_rate": 1.6304104894583926e-05,
"loss": 0.9968,
"step": 7950
},
{
"epoch": 16.36,
"learning_rate": 1.6266855397452133e-05,
"loss": 1.0165,
"step": 8000
},
{
"epoch": 16.46,
"learning_rate": 1.6229605900320347e-05,
"loss": 0.956,
"step": 8050
},
{
"epoch": 16.56,
"learning_rate": 1.6192356403188558e-05,
"loss": 0.9775,
"step": 8100
},
{
"epoch": 16.67,
"learning_rate": 1.615510690605677e-05,
"loss": 0.9906,
"step": 8150
},
{
"epoch": 16.77,
"learning_rate": 1.6117857408924982e-05,
"loss": 0.9799,
"step": 8200
},
{
"epoch": 16.87,
"learning_rate": 1.6080607911793192e-05,
"loss": 0.9543,
"step": 8250
},
{
"epoch": 16.97,
"learning_rate": 1.6043358414661403e-05,
"loss": 0.9317,
"step": 8300
},
{
"epoch": 17.08,
"learning_rate": 1.6006108917529613e-05,
"loss": 0.9944,
"step": 8350
},
{
"epoch": 17.18,
"learning_rate": 1.5968859420397827e-05,
"loss": 0.9331,
"step": 8400
},
{
"epoch": 17.28,
"learning_rate": 1.5931609923266038e-05,
"loss": 0.9199,
"step": 8450
},
{
"epoch": 17.38,
"learning_rate": 1.5894360426134248e-05,
"loss": 0.9498,
"step": 8500
},
{
"epoch": 17.48,
"learning_rate": 1.585711092900246e-05,
"loss": 0.9218,
"step": 8550
},
{
"epoch": 17.59,
"learning_rate": 1.5819861431870672e-05,
"loss": 0.9244,
"step": 8600
},
{
"epoch": 17.69,
"learning_rate": 1.5782611934738883e-05,
"loss": 0.909,
"step": 8650
},
{
"epoch": 17.79,
"learning_rate": 1.5745362437607093e-05,
"loss": 0.9292,
"step": 8700
},
{
"epoch": 17.89,
"learning_rate": 1.5708112940475304e-05,
"loss": 0.9367,
"step": 8750
},
{
"epoch": 18.0,
"learning_rate": 1.5670863443343517e-05,
"loss": 0.8606,
"step": 8800
},
{
"epoch": 18.1,
"learning_rate": 1.5633613946211728e-05,
"loss": 0.9227,
"step": 8850
},
{
"epoch": 18.2,
"learning_rate": 1.5596364449079938e-05,
"loss": 0.8956,
"step": 8900
},
{
"epoch": 18.3,
"learning_rate": 1.5559114951948152e-05,
"loss": 0.875,
"step": 8950
},
{
"epoch": 18.4,
"learning_rate": 1.5522610444758996e-05,
"loss": 0.9021,
"step": 9000
},
{
"epoch": 18.51,
"learning_rate": 1.5485360947627207e-05,
"loss": 0.8596,
"step": 9050
},
{
"epoch": 18.61,
"learning_rate": 1.544811145049542e-05,
"loss": 0.8488,
"step": 9100
},
{
"epoch": 18.71,
"learning_rate": 1.541086195336363e-05,
"loss": 0.8826,
"step": 9150
},
{
"epoch": 18.81,
"learning_rate": 1.537361245623184e-05,
"loss": 0.8713,
"step": 9200
},
{
"epoch": 18.92,
"learning_rate": 1.5336362959100052e-05,
"loss": 0.9108,
"step": 9250
},
{
"epoch": 19.02,
"learning_rate": 1.5299113461968266e-05,
"loss": 0.8792,
"step": 9300
},
{
"epoch": 19.12,
"learning_rate": 1.5261863964836476e-05,
"loss": 0.8527,
"step": 9350
},
{
"epoch": 19.22,
"learning_rate": 1.5224614467704688e-05,
"loss": 0.8418,
"step": 9400
},
{
"epoch": 19.33,
"learning_rate": 1.5187364970572897e-05,
"loss": 0.8594,
"step": 9450
},
{
"epoch": 19.43,
"learning_rate": 1.5150115473441109e-05,
"loss": 0.8778,
"step": 9500
},
{
"epoch": 19.53,
"learning_rate": 1.5112865976309321e-05,
"loss": 0.8213,
"step": 9550
},
{
"epoch": 19.63,
"learning_rate": 1.5075616479177533e-05,
"loss": 0.8362,
"step": 9600
},
{
"epoch": 19.73,
"learning_rate": 1.5038366982045744e-05,
"loss": 0.8534,
"step": 9650
},
{
"epoch": 19.84,
"learning_rate": 1.5001117484913954e-05,
"loss": 0.8341,
"step": 9700
},
{
"epoch": 19.94,
"learning_rate": 1.4963867987782166e-05,
"loss": 0.8463,
"step": 9750
},
{
"epoch": 20.04,
"learning_rate": 1.4926618490650377e-05,
"loss": 0.8203,
"step": 9800
},
{
"epoch": 20.14,
"learning_rate": 1.4889368993518589e-05,
"loss": 0.8103,
"step": 9850
},
{
"epoch": 20.25,
"learning_rate": 1.4852119496386801e-05,
"loss": 0.8198,
"step": 9900
},
{
"epoch": 20.35,
"learning_rate": 1.481486999925501e-05,
"loss": 0.833,
"step": 9950
},
{
"epoch": 20.45,
"learning_rate": 1.4777620502123222e-05,
"loss": 0.8217,
"step": 10000
},
{
"epoch": 20.45,
"eval_loss": 1.1415427923202515,
"eval_runtime": 5.7305,
"eval_samples_per_second": 247.797,
"eval_steps_per_second": 15.531,
"eval_wer": 0.6047925932649542,
"step": 10000
},
{
"epoch": 20.55,
"learning_rate": 1.4740371004991434e-05,
"loss": 0.8426,
"step": 10050
},
{
"epoch": 20.65,
"learning_rate": 1.4703121507859646e-05,
"loss": 0.78,
"step": 10100
},
{
"epoch": 20.76,
"learning_rate": 1.4665872010727857e-05,
"loss": 0.8446,
"step": 10150
},
{
"epoch": 20.86,
"learning_rate": 1.4628622513596067e-05,
"loss": 0.8481,
"step": 10200
},
{
"epoch": 20.96,
"learning_rate": 1.459137301646428e-05,
"loss": 0.8037,
"step": 10250
},
{
"epoch": 21.06,
"learning_rate": 1.455412351933249e-05,
"loss": 0.7794,
"step": 10300
},
{
"epoch": 21.17,
"learning_rate": 1.4516874022200702e-05,
"loss": 0.8355,
"step": 10350
},
{
"epoch": 21.27,
"learning_rate": 1.4479624525068914e-05,
"loss": 0.7945,
"step": 10400
},
{
"epoch": 21.37,
"learning_rate": 1.4442375027937123e-05,
"loss": 0.7716,
"step": 10450
},
{
"epoch": 21.47,
"learning_rate": 1.4405125530805335e-05,
"loss": 0.7793,
"step": 10500
},
{
"epoch": 21.57,
"learning_rate": 1.4367876033673547e-05,
"loss": 0.7672,
"step": 10550
},
{
"epoch": 21.68,
"learning_rate": 1.4330626536541759e-05,
"loss": 0.7617,
"step": 10600
},
{
"epoch": 21.78,
"learning_rate": 1.429337703940997e-05,
"loss": 0.7784,
"step": 10650
},
{
"epoch": 21.88,
"learning_rate": 1.425612754227818e-05,
"loss": 0.7942,
"step": 10700
},
{
"epoch": 21.98,
"learning_rate": 1.4218878045146392e-05,
"loss": 0.77,
"step": 10750
},
{
"epoch": 22.09,
"learning_rate": 1.4181628548014603e-05,
"loss": 0.7773,
"step": 10800
},
{
"epoch": 22.19,
"learning_rate": 1.4144379050882815e-05,
"loss": 0.7691,
"step": 10850
},
{
"epoch": 22.29,
"learning_rate": 1.4107129553751025e-05,
"loss": 0.7546,
"step": 10900
},
{
"epoch": 22.39,
"learning_rate": 1.4070625046561873e-05,
"loss": 0.7777,
"step": 10950
},
{
"epoch": 22.49,
"learning_rate": 1.4033375549430085e-05,
"loss": 0.7543,
"step": 11000
},
{
"epoch": 22.6,
"learning_rate": 1.3996126052298295e-05,
"loss": 0.7527,
"step": 11050
},
{
"epoch": 22.7,
"learning_rate": 1.3958876555166507e-05,
"loss": 0.7485,
"step": 11100
},
{
"epoch": 22.8,
"learning_rate": 1.3921627058034716e-05,
"loss": 0.7562,
"step": 11150
},
{
"epoch": 22.9,
"learning_rate": 1.3884377560902928e-05,
"loss": 0.74,
"step": 11200
},
{
"epoch": 23.01,
"learning_rate": 1.384712806377114e-05,
"loss": 0.7872,
"step": 11250
},
{
"epoch": 23.11,
"learning_rate": 1.3809878566639352e-05,
"loss": 0.7484,
"step": 11300
},
{
"epoch": 23.21,
"learning_rate": 1.3772629069507565e-05,
"loss": 0.7232,
"step": 11350
},
{
"epoch": 23.31,
"learning_rate": 1.3735379572375773e-05,
"loss": 0.7759,
"step": 11400
},
{
"epoch": 23.42,
"learning_rate": 1.3698130075243985e-05,
"loss": 0.7556,
"step": 11450
},
{
"epoch": 23.52,
"learning_rate": 1.3660880578112196e-05,
"loss": 0.7344,
"step": 11500
},
{
"epoch": 23.62,
"learning_rate": 1.3623631080980408e-05,
"loss": 0.7157,
"step": 11550
},
{
"epoch": 23.72,
"learning_rate": 1.358638158384862e-05,
"loss": 0.7216,
"step": 11600
},
{
"epoch": 23.82,
"learning_rate": 1.3549132086716829e-05,
"loss": 0.7447,
"step": 11650
},
{
"epoch": 23.93,
"learning_rate": 1.3511882589585041e-05,
"loss": 0.7263,
"step": 11700
},
{
"epoch": 24.03,
"learning_rate": 1.3474633092453253e-05,
"loss": 0.7215,
"step": 11750
},
{
"epoch": 24.13,
"learning_rate": 1.3437383595321465e-05,
"loss": 0.7492,
"step": 11800
},
{
"epoch": 24.23,
"learning_rate": 1.3400134098189677e-05,
"loss": 0.703,
"step": 11850
},
{
"epoch": 24.34,
"learning_rate": 1.3362884601057886e-05,
"loss": 0.7403,
"step": 11900
},
{
"epoch": 24.44,
"learning_rate": 1.3325635103926098e-05,
"loss": 0.7157,
"step": 11950
},
{
"epoch": 24.54,
"learning_rate": 1.3288385606794309e-05,
"loss": 0.7066,
"step": 12000
},
{
"epoch": 24.64,
"learning_rate": 1.3251136109662521e-05,
"loss": 0.7259,
"step": 12050
},
{
"epoch": 24.74,
"learning_rate": 1.3213886612530733e-05,
"loss": 0.6956,
"step": 12100
},
{
"epoch": 24.85,
"learning_rate": 1.3176637115398942e-05,
"loss": 0.7083,
"step": 12150
},
{
"epoch": 24.95,
"learning_rate": 1.3139387618267154e-05,
"loss": 0.7535,
"step": 12200
},
{
"epoch": 25.05,
"learning_rate": 1.3102138121135366e-05,
"loss": 0.7354,
"step": 12250
},
{
"epoch": 25.15,
"learning_rate": 1.3064888624003578e-05,
"loss": 0.6756,
"step": 12300
},
{
"epoch": 25.26,
"learning_rate": 1.3027639126871787e-05,
"loss": 0.7499,
"step": 12350
},
{
"epoch": 25.36,
"learning_rate": 1.2990389629739999e-05,
"loss": 0.7328,
"step": 12400
},
{
"epoch": 25.46,
"learning_rate": 1.2953140132608211e-05,
"loss": 0.7181,
"step": 12450
},
{
"epoch": 25.56,
"learning_rate": 1.2915890635476422e-05,
"loss": 0.7405,
"step": 12500
},
{
"epoch": 25.56,
"eval_loss": 1.081368327140808,
"eval_runtime": 5.5823,
"eval_samples_per_second": 254.374,
"eval_steps_per_second": 15.943,
"eval_wer": 0.5775619497140783,
"step": 12500
},
{
"epoch": 25.66,
"learning_rate": 1.2878641138344634e-05,
"loss": 0.7138,
"step": 12550
},
{
"epoch": 25.77,
"learning_rate": 1.2841391641212844e-05,
"loss": 0.6854,
"step": 12600
},
{
"epoch": 25.87,
"learning_rate": 1.2804142144081055e-05,
"loss": 0.7195,
"step": 12650
},
{
"epoch": 25.97,
"learning_rate": 1.2766892646949267e-05,
"loss": 0.6998,
"step": 12700
},
{
"epoch": 26.07,
"learning_rate": 1.2729643149817479e-05,
"loss": 0.7105,
"step": 12750
},
{
"epoch": 26.18,
"learning_rate": 1.2692393652685691e-05,
"loss": 0.6686,
"step": 12800
},
{
"epoch": 26.28,
"learning_rate": 1.26551441555539e-05,
"loss": 0.7408,
"step": 12850
},
{
"epoch": 26.38,
"learning_rate": 1.2617894658422112e-05,
"loss": 0.6875,
"step": 12900
},
{
"epoch": 26.48,
"learning_rate": 1.2580645161290324e-05,
"loss": 0.668,
"step": 12950
},
{
"epoch": 26.58,
"learning_rate": 1.2543395664158534e-05,
"loss": 0.7206,
"step": 13000
},
{
"epoch": 26.69,
"learning_rate": 1.2506146167026747e-05,
"loss": 0.7014,
"step": 13050
},
{
"epoch": 26.79,
"learning_rate": 1.2468896669894957e-05,
"loss": 0.7276,
"step": 13100
},
{
"epoch": 26.89,
"learning_rate": 1.2431647172763168e-05,
"loss": 0.6964,
"step": 13150
},
{
"epoch": 26.99,
"learning_rate": 1.239439767563138e-05,
"loss": 0.695,
"step": 13200
},
{
"epoch": 27.1,
"learning_rate": 1.2357148178499592e-05,
"loss": 0.6874,
"step": 13250
},
{
"epoch": 27.2,
"learning_rate": 1.2319898681367804e-05,
"loss": 0.6847,
"step": 13300
},
{
"epoch": 27.3,
"learning_rate": 1.2282649184236013e-05,
"loss": 0.6995,
"step": 13350
},
{
"epoch": 27.4,
"learning_rate": 1.2245399687104225e-05,
"loss": 0.725,
"step": 13400
},
{
"epoch": 27.51,
"learning_rate": 1.2208150189972437e-05,
"loss": 0.6871,
"step": 13450
},
{
"epoch": 27.61,
"learning_rate": 1.2170900692840647e-05,
"loss": 0.692,
"step": 13500
},
{
"epoch": 27.71,
"learning_rate": 1.213365119570886e-05,
"loss": 0.6804,
"step": 13550
},
{
"epoch": 27.81,
"learning_rate": 1.209640169857707e-05,
"loss": 0.684,
"step": 13600
},
{
"epoch": 27.91,
"learning_rate": 1.205915220144528e-05,
"loss": 0.7066,
"step": 13650
},
{
"epoch": 28.02,
"learning_rate": 1.2021902704313493e-05,
"loss": 0.6648,
"step": 13700
},
{
"epoch": 28.12,
"learning_rate": 1.1984653207181705e-05,
"loss": 0.6765,
"step": 13750
},
{
"epoch": 28.22,
"learning_rate": 1.1947403710049917e-05,
"loss": 0.6853,
"step": 13800
},
{
"epoch": 28.32,
"learning_rate": 1.1910154212918126e-05,
"loss": 0.6747,
"step": 13850
},
{
"epoch": 28.43,
"learning_rate": 1.1872904715786338e-05,
"loss": 0.6698,
"step": 13900
},
{
"epoch": 28.53,
"learning_rate": 1.183565521865455e-05,
"loss": 0.6562,
"step": 13950
},
{
"epoch": 28.63,
"learning_rate": 1.179840572152276e-05,
"loss": 0.6788,
"step": 14000
},
{
"epoch": 28.73,
"learning_rate": 1.1761156224390972e-05,
"loss": 0.6573,
"step": 14050
},
{
"epoch": 28.83,
"learning_rate": 1.1723906727259183e-05,
"loss": 0.6674,
"step": 14100
},
{
"epoch": 28.94,
"learning_rate": 1.1686657230127393e-05,
"loss": 0.6958,
"step": 14150
},
{
"epoch": 29.04,
"learning_rate": 1.1650152722938242e-05,
"loss": 0.6722,
"step": 14200
},
{
"epoch": 29.14,
"learning_rate": 1.1612903225806453e-05,
"loss": 0.7096,
"step": 14250
},
{
"epoch": 29.24,
"learning_rate": 1.1575653728674663e-05,
"loss": 0.6986,
"step": 14300
},
{
"epoch": 29.35,
"learning_rate": 1.1538404231542875e-05,
"loss": 0.654,
"step": 14350
},
{
"epoch": 29.45,
"learning_rate": 1.1501154734411086e-05,
"loss": 0.6551,
"step": 14400
},
{
"epoch": 29.55,
"learning_rate": 1.1463905237279298e-05,
"loss": 0.6623,
"step": 14450
},
{
"epoch": 29.65,
"learning_rate": 1.142665574014751e-05,
"loss": 0.74,
"step": 14500
},
{
"epoch": 29.75,
"learning_rate": 1.1389406243015719e-05,
"loss": 0.6907,
"step": 14550
},
{
"epoch": 29.86,
"learning_rate": 1.1352156745883931e-05,
"loss": 0.6797,
"step": 14600
},
{
"epoch": 29.96,
"learning_rate": 1.1314907248752143e-05,
"loss": 0.6425,
"step": 14650
},
{
"epoch": 30.06,
"learning_rate": 1.1277657751620355e-05,
"loss": 0.6819,
"step": 14700
},
{
"epoch": 30.16,
"learning_rate": 1.1240408254488566e-05,
"loss": 0.6632,
"step": 14750
},
{
"epoch": 30.27,
"learning_rate": 1.1203158757356776e-05,
"loss": 0.6504,
"step": 14800
},
{
"epoch": 30.37,
"learning_rate": 1.1165909260224988e-05,
"loss": 0.6434,
"step": 14850
},
{
"epoch": 30.47,
"learning_rate": 1.1128659763093199e-05,
"loss": 0.6586,
"step": 14900
},
{
"epoch": 30.57,
"learning_rate": 1.1091410265961411e-05,
"loss": 0.6842,
"step": 14950
},
{
"epoch": 30.67,
"learning_rate": 1.1054160768829623e-05,
"loss": 0.6432,
"step": 15000
},
{
"epoch": 30.67,
"eval_loss": 1.0632429122924805,
"eval_runtime": 5.5788,
"eval_samples_per_second": 254.536,
"eval_steps_per_second": 15.953,
"eval_wer": 0.5611327947717164,
"step": 15000
},
{
"epoch": 30.78,
"learning_rate": 1.1016911271697832e-05,
"loss": 0.6436,
"step": 15050
},
{
"epoch": 30.88,
"learning_rate": 1.0979661774566044e-05,
"loss": 0.6355,
"step": 15100
},
{
"epoch": 30.98,
"learning_rate": 1.0942412277434256e-05,
"loss": 0.6132,
"step": 15150
},
{
"epoch": 31.08,
"learning_rate": 1.0905162780302468e-05,
"loss": 0.6394,
"step": 15200
},
{
"epoch": 31.19,
"learning_rate": 1.0867913283170679e-05,
"loss": 0.6463,
"step": 15250
},
{
"epoch": 31.29,
"learning_rate": 1.0830663786038889e-05,
"loss": 0.6618,
"step": 15300
},
{
"epoch": 31.39,
"learning_rate": 1.07934142889071e-05,
"loss": 0.6488,
"step": 15350
},
{
"epoch": 31.49,
"learning_rate": 1.0756164791775312e-05,
"loss": 0.6096,
"step": 15400
},
{
"epoch": 31.6,
"learning_rate": 1.0718915294643524e-05,
"loss": 0.6595,
"step": 15450
},
{
"epoch": 31.7,
"learning_rate": 1.0681665797511736e-05,
"loss": 0.6379,
"step": 15500
},
{
"epoch": 31.8,
"learning_rate": 1.0644416300379945e-05,
"loss": 0.6512,
"step": 15550
},
{
"epoch": 31.9,
"learning_rate": 1.0607166803248157e-05,
"loss": 0.6294,
"step": 15600
},
{
"epoch": 32.0,
"learning_rate": 1.0569917306116369e-05,
"loss": 0.6473,
"step": 15650
},
{
"epoch": 32.11,
"learning_rate": 1.0532667808984581e-05,
"loss": 0.6284,
"step": 15700
},
{
"epoch": 32.21,
"learning_rate": 1.0495418311852791e-05,
"loss": 0.6451,
"step": 15750
},
{
"epoch": 32.31,
"learning_rate": 1.0458168814721002e-05,
"loss": 0.6226,
"step": 15800
},
{
"epoch": 32.41,
"learning_rate": 1.0420919317589212e-05,
"loss": 0.6219,
"step": 15850
},
{
"epoch": 32.52,
"learning_rate": 1.0383669820457424e-05,
"loss": 0.6312,
"step": 15900
},
{
"epoch": 32.62,
"learning_rate": 1.0346420323325637e-05,
"loss": 0.65,
"step": 15950
},
{
"epoch": 32.72,
"learning_rate": 1.0309170826193845e-05,
"loss": 0.6234,
"step": 16000
},
{
"epoch": 32.82,
"learning_rate": 1.0271921329062057e-05,
"loss": 0.63,
"step": 16050
},
{
"epoch": 32.92,
"learning_rate": 1.023467183193027e-05,
"loss": 0.6055,
"step": 16100
},
{
"epoch": 33.03,
"learning_rate": 1.0197422334798482e-05,
"loss": 0.6045,
"step": 16150
},
{
"epoch": 33.13,
"learning_rate": 1.0160172837666694e-05,
"loss": 0.604,
"step": 16200
},
{
"epoch": 33.23,
"learning_rate": 1.0122923340534903e-05,
"loss": 0.6345,
"step": 16250
},
{
"epoch": 33.33,
"learning_rate": 1.0085673843403115e-05,
"loss": 0.6033,
"step": 16300
},
{
"epoch": 33.44,
"learning_rate": 1.0048424346271325e-05,
"loss": 0.6291,
"step": 16350
},
{
"epoch": 33.54,
"learning_rate": 1.0011174849139537e-05,
"loss": 0.6049,
"step": 16400
},
{
"epoch": 33.64,
"learning_rate": 9.973925352007748e-06,
"loss": 0.6267,
"step": 16450
},
{
"epoch": 33.74,
"learning_rate": 9.93667585487596e-06,
"loss": 0.6422,
"step": 16500
},
{
"epoch": 33.84,
"learning_rate": 9.89942635774417e-06,
"loss": 0.6548,
"step": 16550
},
{
"epoch": 33.95,
"learning_rate": 9.862176860612382e-06,
"loss": 0.6218,
"step": 16600
},
{
"epoch": 34.05,
"learning_rate": 9.824927363480595e-06,
"loss": 0.6406,
"step": 16650
},
{
"epoch": 34.15,
"learning_rate": 9.787677866348805e-06,
"loss": 0.6268,
"step": 16700
},
{
"epoch": 34.25,
"learning_rate": 9.750428369217017e-06,
"loss": 0.6295,
"step": 16750
},
{
"epoch": 34.36,
"learning_rate": 9.713178872085228e-06,
"loss": 0.5852,
"step": 16800
},
{
"epoch": 34.46,
"learning_rate": 9.675929374953438e-06,
"loss": 0.5963,
"step": 16850
},
{
"epoch": 34.56,
"learning_rate": 9.63867987782165e-06,
"loss": 0.6169,
"step": 16900
},
{
"epoch": 34.66,
"learning_rate": 9.60143038068986e-06,
"loss": 0.6252,
"step": 16950
},
{
"epoch": 34.76,
"learning_rate": 9.564180883558073e-06,
"loss": 0.6152,
"step": 17000
},
{
"epoch": 34.87,
"learning_rate": 9.526931386426283e-06,
"loss": 0.6367,
"step": 17050
},
{
"epoch": 34.97,
"learning_rate": 9.489681889294495e-06,
"loss": 0.6432,
"step": 17100
},
{
"epoch": 35.07,
"learning_rate": 9.452432392162708e-06,
"loss": 0.6001,
"step": 17150
},
{
"epoch": 35.17,
"learning_rate": 9.415182895030918e-06,
"loss": 0.633,
"step": 17200
},
{
"epoch": 35.28,
"learning_rate": 9.37793339789913e-06,
"loss": 0.6418,
"step": 17250
},
{
"epoch": 35.38,
"learning_rate": 9.34068390076734e-06,
"loss": 0.5983,
"step": 17300
},
{
"epoch": 35.48,
"learning_rate": 9.303434403635551e-06,
"loss": 0.6485,
"step": 17350
},
{
"epoch": 35.58,
"learning_rate": 9.266184906503763e-06,
"loss": 0.5917,
"step": 17400
},
{
"epoch": 35.69,
"learning_rate": 9.228935409371974e-06,
"loss": 0.614,
"step": 17450
},
{
"epoch": 35.79,
"learning_rate": 9.191685912240186e-06,
"loss": 0.6507,
"step": 17500
},
{
"epoch": 35.79,
"eval_loss": 1.020020604133606,
"eval_runtime": 5.7202,
"eval_samples_per_second": 248.244,
"eval_steps_per_second": 15.559,
"eval_wer": 0.5427067259689571,
"step": 17500
},
{
"epoch": 35.89,
"learning_rate": 9.154436415108396e-06,
"loss": 0.6176,
"step": 17550
},
{
"epoch": 35.99,
"learning_rate": 9.117186917976608e-06,
"loss": 0.618,
"step": 17600
},
{
"epoch": 36.09,
"learning_rate": 9.07993742084482e-06,
"loss": 0.613,
"step": 17650
},
{
"epoch": 36.2,
"learning_rate": 9.04268792371303e-06,
"loss": 0.6029,
"step": 17700
},
{
"epoch": 36.3,
"learning_rate": 9.005438426581243e-06,
"loss": 0.6019,
"step": 17750
},
{
"epoch": 36.4,
"learning_rate": 8.968188929449453e-06,
"loss": 0.6066,
"step": 17800
},
{
"epoch": 36.5,
"learning_rate": 8.930939432317664e-06,
"loss": 0.6429,
"step": 17850
},
{
"epoch": 36.61,
"learning_rate": 8.893689935185876e-06,
"loss": 0.5887,
"step": 17900
},
{
"epoch": 36.71,
"learning_rate": 8.856440438054086e-06,
"loss": 0.604,
"step": 17950
},
{
"epoch": 36.81,
"learning_rate": 8.819190940922299e-06,
"loss": 0.585,
"step": 18000
},
{
"epoch": 36.91,
"learning_rate": 8.781941443790509e-06,
"loss": 0.648,
"step": 18050
},
{
"epoch": 37.01,
"learning_rate": 8.744691946658721e-06,
"loss": 0.6289,
"step": 18100
},
{
"epoch": 37.12,
"learning_rate": 8.707442449526932e-06,
"loss": 0.6071,
"step": 18150
},
{
"epoch": 37.22,
"learning_rate": 8.670192952395144e-06,
"loss": 0.6095,
"step": 18200
},
{
"epoch": 37.32,
"learning_rate": 8.632943455263356e-06,
"loss": 0.5943,
"step": 18250
},
{
"epoch": 37.42,
"learning_rate": 8.595693958131566e-06,
"loss": 0.6301,
"step": 18300
},
{
"epoch": 37.53,
"learning_rate": 8.558444460999777e-06,
"loss": 0.5924,
"step": 18350
},
{
"epoch": 37.63,
"learning_rate": 8.521194963867989e-06,
"loss": 0.5866,
"step": 18400
},
{
"epoch": 37.73,
"learning_rate": 8.4839454667362e-06,
"loss": 0.5944,
"step": 18450
},
{
"epoch": 37.83,
"learning_rate": 8.446695969604411e-06,
"loss": 0.5767,
"step": 18500
},
{
"epoch": 37.93,
"learning_rate": 8.409446472472622e-06,
"loss": 0.6003,
"step": 18550
},
{
"epoch": 38.04,
"learning_rate": 8.37294196528347e-06,
"loss": 0.5739,
"step": 18600
},
{
"epoch": 38.14,
"learning_rate": 8.33569246815168e-06,
"loss": 0.6051,
"step": 18650
},
{
"epoch": 38.24,
"learning_rate": 8.298442971019892e-06,
"loss": 0.5782,
"step": 18700
},
{
"epoch": 38.34,
"learning_rate": 8.261193473888102e-06,
"loss": 0.5972,
"step": 18750
},
{
"epoch": 38.45,
"learning_rate": 8.223943976756314e-06,
"loss": 0.596,
"step": 18800
},
{
"epoch": 38.55,
"learning_rate": 8.186694479624527e-06,
"loss": 0.5952,
"step": 18850
},
{
"epoch": 38.65,
"learning_rate": 8.149444982492737e-06,
"loss": 0.6043,
"step": 18900
},
{
"epoch": 38.75,
"learning_rate": 8.11219548536095e-06,
"loss": 0.5824,
"step": 18950
},
{
"epoch": 38.85,
"learning_rate": 8.07494598822916e-06,
"loss": 0.5873,
"step": 19000
},
{
"epoch": 38.96,
"learning_rate": 8.037696491097372e-06,
"loss": 0.6216,
"step": 19050
},
{
"epoch": 39.06,
"learning_rate": 8.000446993965582e-06,
"loss": 0.5793,
"step": 19100
},
{
"epoch": 39.16,
"learning_rate": 7.963197496833793e-06,
"loss": 0.5877,
"step": 19150
},
{
"epoch": 39.26,
"learning_rate": 7.925947999702005e-06,
"loss": 0.5761,
"step": 19200
},
{
"epoch": 39.37,
"learning_rate": 7.888698502570215e-06,
"loss": 0.5518,
"step": 19250
},
{
"epoch": 39.47,
"learning_rate": 7.851449005438427e-06,
"loss": 0.6275,
"step": 19300
},
{
"epoch": 39.57,
"learning_rate": 7.814199508306638e-06,
"loss": 0.5729,
"step": 19350
},
{
"epoch": 39.67,
"learning_rate": 7.77695001117485e-06,
"loss": 0.5893,
"step": 19400
},
{
"epoch": 39.78,
"learning_rate": 7.739700514043062e-06,
"loss": 0.5863,
"step": 19450
},
{
"epoch": 39.88,
"learning_rate": 7.702451016911272e-06,
"loss": 0.5664,
"step": 19500
},
{
"epoch": 39.98,
"learning_rate": 7.665201519779485e-06,
"loss": 0.5838,
"step": 19550
},
{
"epoch": 40.08,
"learning_rate": 7.627952022647694e-06,
"loss": 0.5747,
"step": 19600
},
{
"epoch": 40.18,
"learning_rate": 7.590702525515906e-06,
"loss": 0.5741,
"step": 19650
},
{
"epoch": 40.29,
"learning_rate": 7.553453028384118e-06,
"loss": 0.574,
"step": 19700
},
{
"epoch": 40.39,
"learning_rate": 7.516203531252328e-06,
"loss": 0.591,
"step": 19750
},
{
"epoch": 40.49,
"learning_rate": 7.47895403412054e-06,
"loss": 0.5645,
"step": 19800
},
{
"epoch": 40.59,
"learning_rate": 7.441704536988751e-06,
"loss": 0.581,
"step": 19850
},
{
"epoch": 40.7,
"learning_rate": 7.404455039856963e-06,
"loss": 0.5919,
"step": 19900
},
{
"epoch": 40.8,
"learning_rate": 7.367205542725174e-06,
"loss": 0.5882,
"step": 19950
},
{
"epoch": 40.9,
"learning_rate": 7.3299560455933845e-06,
"loss": 0.5533,
"step": 20000
},
{
"epoch": 40.9,
"eval_loss": 1.001917839050293,
"eval_runtime": 5.6355,
"eval_samples_per_second": 251.972,
"eval_steps_per_second": 15.793,
"eval_wer": 0.5367159843877644,
"step": 20000
},
{
"epoch": 41.0,
"learning_rate": 7.292706548461597e-06,
"loss": 0.5863,
"step": 20050
},
{
"epoch": 41.1,
"learning_rate": 7.255457051329807e-06,
"loss": 0.5712,
"step": 20100
},
{
"epoch": 41.21,
"learning_rate": 7.218207554198019e-06,
"loss": 0.5666,
"step": 20150
},
{
"epoch": 41.31,
"learning_rate": 7.1809580570662305e-06,
"loss": 0.564,
"step": 20200
},
{
"epoch": 41.41,
"learning_rate": 7.143708559934441e-06,
"loss": 0.5339,
"step": 20250
},
{
"epoch": 41.51,
"learning_rate": 7.107204052745289e-06,
"loss": 0.5874,
"step": 20300
},
{
"epoch": 41.62,
"learning_rate": 7.0699545556135e-06,
"loss": 0.57,
"step": 20350
},
{
"epoch": 41.72,
"learning_rate": 7.032705058481711e-06,
"loss": 0.5935,
"step": 20400
},
{
"epoch": 41.82,
"learning_rate": 6.995455561349922e-06,
"loss": 0.5489,
"step": 20450
},
{
"epoch": 41.92,
"learning_rate": 6.9582060642181335e-06,
"loss": 0.6135,
"step": 20500
},
{
"epoch": 42.02,
"learning_rate": 6.920956567086346e-06,
"loss": 0.5715,
"step": 20550
},
{
"epoch": 42.13,
"learning_rate": 6.883707069954556e-06,
"loss": 0.5698,
"step": 20600
},
{
"epoch": 42.23,
"learning_rate": 6.846457572822767e-06,
"loss": 0.556,
"step": 20650
},
{
"epoch": 42.33,
"learning_rate": 6.809208075690979e-06,
"loss": 0.5796,
"step": 20700
},
{
"epoch": 42.43,
"learning_rate": 6.77195857855919e-06,
"loss": 0.5742,
"step": 20750
},
{
"epoch": 42.54,
"learning_rate": 6.7347090814274004e-06,
"loss": 0.5598,
"step": 20800
},
{
"epoch": 42.64,
"learning_rate": 6.6974595842956126e-06,
"loss": 0.5841,
"step": 20850
},
{
"epoch": 42.74,
"learning_rate": 6.660210087163824e-06,
"loss": 0.5735,
"step": 20900
},
{
"epoch": 42.84,
"learning_rate": 6.622960590032035e-06,
"loss": 0.5692,
"step": 20950
},
{
"epoch": 42.94,
"learning_rate": 6.585711092900246e-06,
"loss": 0.5861,
"step": 21000
},
{
"epoch": 43.05,
"learning_rate": 6.548461595768457e-06,
"loss": 0.5383,
"step": 21050
},
{
"epoch": 43.15,
"learning_rate": 6.511212098636669e-06,
"loss": 0.5861,
"step": 21100
},
{
"epoch": 43.25,
"learning_rate": 6.47396260150488e-06,
"loss": 0.571,
"step": 21150
},
{
"epoch": 43.35,
"learning_rate": 6.4367131043730916e-06,
"loss": 0.552,
"step": 21200
},
{
"epoch": 43.46,
"learning_rate": 6.399463607241303e-06,
"loss": 0.5757,
"step": 21250
},
{
"epoch": 43.56,
"learning_rate": 6.362214110109513e-06,
"loss": 0.582,
"step": 21300
},
{
"epoch": 43.66,
"learning_rate": 6.3249646129777254e-06,
"loss": 0.5598,
"step": 21350
},
{
"epoch": 43.76,
"learning_rate": 6.287715115845937e-06,
"loss": 0.5842,
"step": 21400
},
{
"epoch": 43.87,
"learning_rate": 6.250465618714148e-06,
"loss": 0.5578,
"step": 21450
},
{
"epoch": 43.97,
"learning_rate": 6.213216121582359e-06,
"loss": 0.5297,
"step": 21500
},
{
"epoch": 44.07,
"learning_rate": 6.17596662445057e-06,
"loss": 0.5733,
"step": 21550
},
{
"epoch": 44.17,
"learning_rate": 6.138717127318782e-06,
"loss": 0.5656,
"step": 21600
},
{
"epoch": 44.27,
"learning_rate": 6.101467630186993e-06,
"loss": 0.6059,
"step": 21650
},
{
"epoch": 44.38,
"learning_rate": 6.0642181330552044e-06,
"loss": 0.5543,
"step": 21700
},
{
"epoch": 44.48,
"learning_rate": 6.026968635923416e-06,
"loss": 0.565,
"step": 21750
},
{
"epoch": 44.58,
"learning_rate": 5.989719138791626e-06,
"loss": 0.6275,
"step": 21800
},
{
"epoch": 44.68,
"learning_rate": 5.952469641659838e-06,
"loss": 0.5862,
"step": 21850
},
{
"epoch": 44.79,
"learning_rate": 5.915220144528049e-06,
"loss": 0.574,
"step": 21900
},
{
"epoch": 44.89,
"learning_rate": 5.877970647396261e-06,
"loss": 0.5824,
"step": 21950
},
{
"epoch": 44.99,
"learning_rate": 5.840721150264472e-06,
"loss": 0.5732,
"step": 22000
},
{
"epoch": 45.09,
"learning_rate": 5.803471653132683e-06,
"loss": 0.6061,
"step": 22050
},
{
"epoch": 45.19,
"learning_rate": 5.766222156000895e-06,
"loss": 0.5787,
"step": 22100
},
{
"epoch": 45.3,
"learning_rate": 5.728972658869105e-06,
"loss": 0.5657,
"step": 22150
},
{
"epoch": 45.4,
"learning_rate": 5.691723161737317e-06,
"loss": 0.5625,
"step": 22200
},
{
"epoch": 45.5,
"learning_rate": 5.654473664605529e-06,
"loss": 0.5705,
"step": 22250
},
{
"epoch": 45.6,
"learning_rate": 5.617224167473739e-06,
"loss": 0.5866,
"step": 22300
},
{
"epoch": 45.71,
"learning_rate": 5.579974670341951e-06,
"loss": 0.5612,
"step": 22350
},
{
"epoch": 45.81,
"learning_rate": 5.543470163152798e-06,
"loss": 0.5504,
"step": 22400
},
{
"epoch": 45.91,
"learning_rate": 5.506220666021009e-06,
"loss": 0.5683,
"step": 22450
},
{
"epoch": 46.01,
"learning_rate": 5.46897116888922e-06,
"loss": 0.561,
"step": 22500
},
{
"epoch": 46.01,
"eval_loss": 1.0245795249938965,
"eval_runtime": 5.5746,
"eval_samples_per_second": 254.728,
"eval_steps_per_second": 15.965,
"eval_wer": 0.5391667423073432,
"step": 22500
},
{
"epoch": 46.11,
"learning_rate": 5.431721671757432e-06,
"loss": 0.5509,
"step": 22550
},
{
"epoch": 46.22,
"learning_rate": 5.394472174625644e-06,
"loss": 0.5504,
"step": 22600
},
{
"epoch": 46.32,
"learning_rate": 5.357222677493854e-06,
"loss": 0.5648,
"step": 22650
},
{
"epoch": 46.42,
"learning_rate": 5.3199731803620655e-06,
"loss": 0.5443,
"step": 22700
},
{
"epoch": 46.52,
"learning_rate": 5.282723683230277e-06,
"loss": 0.5672,
"step": 22750
},
{
"epoch": 46.63,
"learning_rate": 5.245474186098488e-06,
"loss": 0.5423,
"step": 22800
},
{
"epoch": 46.73,
"learning_rate": 5.2082246889667e-06,
"loss": 0.5594,
"step": 22850
},
{
"epoch": 46.83,
"learning_rate": 5.170975191834911e-06,
"loss": 0.5534,
"step": 22900
},
{
"epoch": 46.93,
"learning_rate": 5.133725694703122e-06,
"loss": 0.5431,
"step": 22950
},
{
"epoch": 47.03,
"learning_rate": 5.096476197571333e-06,
"loss": 0.5624,
"step": 23000
},
{
"epoch": 47.14,
"learning_rate": 5.0592267004395445e-06,
"loss": 0.5671,
"step": 23050
},
{
"epoch": 47.24,
"learning_rate": 5.021977203307757e-06,
"loss": 0.5655,
"step": 23100
},
{
"epoch": 47.34,
"learning_rate": 4.984727706175967e-06,
"loss": 0.5674,
"step": 23150
},
{
"epoch": 47.44,
"learning_rate": 4.947478209044178e-06,
"loss": 0.5519,
"step": 23200
},
{
"epoch": 47.55,
"learning_rate": 4.91022871191239e-06,
"loss": 0.5367,
"step": 23250
},
{
"epoch": 47.65,
"learning_rate": 4.872979214780601e-06,
"loss": 0.5597,
"step": 23300
},
{
"epoch": 47.75,
"learning_rate": 4.835729717648812e-06,
"loss": 0.5737,
"step": 23350
},
{
"epoch": 47.85,
"learning_rate": 4.7984802205170235e-06,
"loss": 0.5495,
"step": 23400
},
{
"epoch": 47.96,
"learning_rate": 4.761230723385235e-06,
"loss": 0.5769,
"step": 23450
},
{
"epoch": 48.06,
"learning_rate": 4.723981226253446e-06,
"loss": 0.5333,
"step": 23500
},
{
"epoch": 48.16,
"learning_rate": 4.686731729121657e-06,
"loss": 0.5369,
"step": 23550
},
{
"epoch": 48.26,
"learning_rate": 4.649482231989869e-06,
"loss": 0.55,
"step": 23600
},
{
"epoch": 48.36,
"learning_rate": 4.61223273485808e-06,
"loss": 0.5634,
"step": 23650
},
{
"epoch": 48.47,
"learning_rate": 4.574983237726291e-06,
"loss": 0.5465,
"step": 23700
},
{
"epoch": 48.57,
"learning_rate": 4.537733740594502e-06,
"loss": 0.5571,
"step": 23750
},
{
"epoch": 48.67,
"learning_rate": 4.500484243462714e-06,
"loss": 0.5387,
"step": 23800
},
{
"epoch": 48.77,
"learning_rate": 4.463234746330925e-06,
"loss": 0.5546,
"step": 23850
},
{
"epoch": 48.88,
"learning_rate": 4.425985249199136e-06,
"loss": 0.5415,
"step": 23900
},
{
"epoch": 48.98,
"learning_rate": 4.388735752067348e-06,
"loss": 0.5613,
"step": 23950
},
{
"epoch": 49.08,
"learning_rate": 4.351486254935558e-06,
"loss": 0.5508,
"step": 24000
},
{
"epoch": 49.18,
"learning_rate": 4.31423675780377e-06,
"loss": 0.5546,
"step": 24050
},
{
"epoch": 49.28,
"learning_rate": 4.2769872606719816e-06,
"loss": 0.5409,
"step": 24100
},
{
"epoch": 49.39,
"learning_rate": 4.239737763540193e-06,
"loss": 0.537,
"step": 24150
},
{
"epoch": 49.49,
"learning_rate": 4.202488266408404e-06,
"loss": 0.536,
"step": 24200
},
{
"epoch": 49.59,
"learning_rate": 4.1652387692766146e-06,
"loss": 0.5474,
"step": 24250
},
{
"epoch": 49.69,
"learning_rate": 4.127989272144826e-06,
"loss": 0.5276,
"step": 24300
},
{
"epoch": 49.8,
"learning_rate": 4.090739775013038e-06,
"loss": 0.5622,
"step": 24350
},
{
"epoch": 49.9,
"learning_rate": 4.053490277881249e-06,
"loss": 0.572,
"step": 24400
},
{
"epoch": 50.0,
"learning_rate": 4.0162407807494606e-06,
"loss": 0.5675,
"step": 24450
},
{
"epoch": 50.1,
"learning_rate": 3.978991283617671e-06,
"loss": 0.5227,
"step": 24500
},
{
"epoch": 50.2,
"learning_rate": 3.9424867764285185e-06,
"loss": 0.5439,
"step": 24550
},
{
"epoch": 50.31,
"learning_rate": 3.90523727929673e-06,
"loss": 0.5623,
"step": 24600
},
{
"epoch": 50.41,
"learning_rate": 3.867987782164941e-06,
"loss": 0.5239,
"step": 24650
},
{
"epoch": 50.51,
"learning_rate": 3.830738285033152e-06,
"loss": 0.5418,
"step": 24700
},
{
"epoch": 50.61,
"learning_rate": 3.7934887879013636e-06,
"loss": 0.5636,
"step": 24750
},
{
"epoch": 50.72,
"learning_rate": 3.756239290769575e-06,
"loss": 0.545,
"step": 24800
},
{
"epoch": 50.82,
"learning_rate": 3.718989793637786e-06,
"loss": 0.5369,
"step": 24850
},
{
"epoch": 50.92,
"learning_rate": 3.6817402965059975e-06,
"loss": 0.548,
"step": 24900
},
{
"epoch": 51.02,
"learning_rate": 3.644490799374209e-06,
"loss": 0.541,
"step": 24950
},
{
"epoch": 51.12,
"learning_rate": 3.60724130224242e-06,
"loss": 0.5292,
"step": 25000
},
{
"epoch": 51.12,
"eval_loss": 0.999214768409729,
"eval_runtime": 5.7806,
"eval_samples_per_second": 245.65,
"eval_steps_per_second": 15.396,
"eval_wer": 0.5244621947898702,
"step": 25000
},
{
"epoch": 51.23,
"learning_rate": 3.5699918051106313e-06,
"loss": 0.5476,
"step": 25050
},
{
"epoch": 51.33,
"learning_rate": 3.5327423079788426e-06,
"loss": 0.553,
"step": 25100
},
{
"epoch": 51.43,
"learning_rate": 3.495492810847054e-06,
"loss": 0.5738,
"step": 25150
},
{
"epoch": 51.53,
"learning_rate": 3.4582433137152648e-06,
"loss": 0.5532,
"step": 25200
},
{
"epoch": 51.64,
"learning_rate": 3.4209938165834765e-06,
"loss": 0.5606,
"step": 25250
},
{
"epoch": 51.74,
"learning_rate": 3.3837443194516878e-06,
"loss": 0.5551,
"step": 25300
},
{
"epoch": 51.84,
"learning_rate": 3.346494822319899e-06,
"loss": 0.5588,
"step": 25350
},
{
"epoch": 51.94,
"learning_rate": 3.3092453251881103e-06,
"loss": 0.544,
"step": 25400
},
{
"epoch": 52.04,
"learning_rate": 3.271995828056321e-06,
"loss": 0.5702,
"step": 25450
},
{
"epoch": 52.15,
"learning_rate": 3.234746330924533e-06,
"loss": 0.5414,
"step": 25500
},
{
"epoch": 52.25,
"learning_rate": 3.197496833792744e-06,
"loss": 0.5565,
"step": 25550
},
{
"epoch": 52.35,
"learning_rate": 3.1602473366609555e-06,
"loss": 0.5664,
"step": 25600
},
{
"epoch": 52.45,
"learning_rate": 3.1229978395291668e-06,
"loss": 0.5323,
"step": 25650
},
{
"epoch": 52.56,
"learning_rate": 3.0857483423973776e-06,
"loss": 0.5358,
"step": 25700
},
{
"epoch": 52.66,
"learning_rate": 3.048498845265589e-06,
"loss": 0.5411,
"step": 25750
},
{
"epoch": 52.76,
"learning_rate": 3.0112493481338006e-06,
"loss": 0.559,
"step": 25800
},
{
"epoch": 52.86,
"learning_rate": 2.973999851002012e-06,
"loss": 0.5627,
"step": 25850
},
{
"epoch": 52.97,
"learning_rate": 2.936750353870223e-06,
"loss": 0.5341,
"step": 25900
},
{
"epoch": 53.07,
"learning_rate": 2.899500856738434e-06,
"loss": 0.5472,
"step": 25950
},
{
"epoch": 53.17,
"learning_rate": 2.8622513596066454e-06,
"loss": 0.5345,
"step": 26000
},
{
"epoch": 53.27,
"learning_rate": 2.825001862474857e-06,
"loss": 0.5602,
"step": 26050
},
{
"epoch": 53.37,
"learning_rate": 2.7877523653430684e-06,
"loss": 0.5905,
"step": 26100
},
{
"epoch": 53.48,
"learning_rate": 2.7505028682112792e-06,
"loss": 0.5738,
"step": 26150
},
{
"epoch": 53.58,
"learning_rate": 2.7132533710794905e-06,
"loss": 0.5178,
"step": 26200
},
{
"epoch": 53.68,
"learning_rate": 2.676003873947702e-06,
"loss": 0.5309,
"step": 26250
},
{
"epoch": 53.78,
"learning_rate": 2.638754376815913e-06,
"loss": 0.5285,
"step": 26300
},
{
"epoch": 53.89,
"learning_rate": 2.601504879684125e-06,
"loss": 0.546,
"step": 26350
},
{
"epoch": 53.99,
"learning_rate": 2.5642553825523357e-06,
"loss": 0.5513,
"step": 26400
},
{
"epoch": 54.09,
"learning_rate": 2.527005885420547e-06,
"loss": 0.5802,
"step": 26450
},
{
"epoch": 54.19,
"learning_rate": 2.4897563882887582e-06,
"loss": 0.5409,
"step": 26500
},
{
"epoch": 54.29,
"learning_rate": 2.4525068911569695e-06,
"loss": 0.527,
"step": 26550
},
{
"epoch": 54.4,
"learning_rate": 2.415257394025181e-06,
"loss": 0.5654,
"step": 26600
},
{
"epoch": 54.5,
"learning_rate": 2.378007896893392e-06,
"loss": 0.5491,
"step": 26650
},
{
"epoch": 54.6,
"learning_rate": 2.3407583997616034e-06,
"loss": 0.5611,
"step": 26700
},
{
"epoch": 54.7,
"learning_rate": 2.3035089026298147e-06,
"loss": 0.5402,
"step": 26750
},
{
"epoch": 54.81,
"learning_rate": 2.266259405498026e-06,
"loss": 0.5158,
"step": 26800
},
{
"epoch": 54.91,
"learning_rate": 2.2290099083662373e-06,
"loss": 0.5668,
"step": 26850
},
{
"epoch": 55.01,
"learning_rate": 2.1917604112344485e-06,
"loss": 0.5349,
"step": 26900
},
{
"epoch": 55.11,
"learning_rate": 2.15451091410266e-06,
"loss": 0.5656,
"step": 26950
},
{
"epoch": 55.21,
"learning_rate": 2.117261416970871e-06,
"loss": 0.5493,
"step": 27000
},
{
"epoch": 55.32,
"learning_rate": 2.0800119198390824e-06,
"loss": 0.5667,
"step": 27050
},
{
"epoch": 55.42,
"learning_rate": 2.0427624227072937e-06,
"loss": 0.5455,
"step": 27100
},
{
"epoch": 55.52,
"learning_rate": 2.0062579155181407e-06,
"loss": 0.5746,
"step": 27150
},
{
"epoch": 55.62,
"learning_rate": 1.969008418386352e-06,
"loss": 0.5252,
"step": 27200
},
{
"epoch": 55.73,
"learning_rate": 1.9317589212545633e-06,
"loss": 0.5417,
"step": 27250
},
{
"epoch": 55.83,
"learning_rate": 1.8945094241227746e-06,
"loss": 0.5651,
"step": 27300
},
{
"epoch": 55.93,
"learning_rate": 1.8572599269909859e-06,
"loss": 0.5276,
"step": 27350
},
{
"epoch": 56.03,
"learning_rate": 1.820010429859197e-06,
"loss": 0.5389,
"step": 27400
},
{
"epoch": 56.13,
"learning_rate": 1.7827609327274084e-06,
"loss": 0.5331,
"step": 27450
},
{
"epoch": 56.24,
"learning_rate": 1.7455114355956195e-06,
"loss": 0.5085,
"step": 27500
},
{
"epoch": 56.24,
"eval_loss": 1.0164018869400024,
"eval_runtime": 5.7221,
"eval_samples_per_second": 248.159,
"eval_steps_per_second": 15.554,
"eval_wer": 0.5323590814196242,
"step": 27500
},
{
"epoch": 56.34,
"learning_rate": 1.7082619384638308e-06,
"loss": 0.5353,
"step": 27550
},
{
"epoch": 56.44,
"learning_rate": 1.6710124413320423e-06,
"loss": 0.5365,
"step": 27600
},
{
"epoch": 56.54,
"learning_rate": 1.6337629442002534e-06,
"loss": 0.5323,
"step": 27650
},
{
"epoch": 56.65,
"learning_rate": 1.5965134470684649e-06,
"loss": 0.5273,
"step": 27700
},
{
"epoch": 56.75,
"learning_rate": 1.559263949936676e-06,
"loss": 0.5435,
"step": 27750
},
{
"epoch": 56.85,
"learning_rate": 1.5220144528048872e-06,
"loss": 0.5362,
"step": 27800
},
{
"epoch": 56.95,
"learning_rate": 1.4847649556730987e-06,
"loss": 0.5242,
"step": 27850
},
{
"epoch": 57.06,
"learning_rate": 1.4475154585413098e-06,
"loss": 0.542,
"step": 27900
},
{
"epoch": 57.16,
"learning_rate": 1.410265961409521e-06,
"loss": 0.5069,
"step": 27950
},
{
"epoch": 57.26,
"learning_rate": 1.3730164642777324e-06,
"loss": 0.5287,
"step": 28000
},
{
"epoch": 57.36,
"learning_rate": 1.3357669671459437e-06,
"loss": 0.5569,
"step": 28050
},
{
"epoch": 57.46,
"learning_rate": 1.2985174700141548e-06,
"loss": 0.546,
"step": 28100
},
{
"epoch": 57.57,
"learning_rate": 1.2612679728823663e-06,
"loss": 0.5211,
"step": 28150
},
{
"epoch": 57.67,
"learning_rate": 1.2240184757505773e-06,
"loss": 0.5389,
"step": 28200
},
{
"epoch": 57.77,
"learning_rate": 1.1867689786187888e-06,
"loss": 0.5294,
"step": 28250
},
{
"epoch": 57.87,
"learning_rate": 1.1495194814870001e-06,
"loss": 0.5307,
"step": 28300
},
{
"epoch": 57.98,
"learning_rate": 1.1122699843552114e-06,
"loss": 0.5206,
"step": 28350
},
{
"epoch": 58.08,
"learning_rate": 1.0750204872234225e-06,
"loss": 0.5106,
"step": 28400
},
{
"epoch": 58.18,
"learning_rate": 1.0377709900916338e-06,
"loss": 0.5513,
"step": 28450
},
{
"epoch": 58.28,
"learning_rate": 1.000521492959845e-06,
"loss": 0.5401,
"step": 28500
},
{
"epoch": 58.38,
"learning_rate": 9.632719958280563e-07,
"loss": 0.5303,
"step": 28550
},
{
"epoch": 58.49,
"learning_rate": 9.260224986962676e-07,
"loss": 0.5416,
"step": 28600
},
{
"epoch": 58.59,
"learning_rate": 8.887730015644789e-07,
"loss": 0.5214,
"step": 28650
},
{
"epoch": 58.69,
"learning_rate": 8.515235044326903e-07,
"loss": 0.527,
"step": 28700
},
{
"epoch": 58.79,
"learning_rate": 8.142740073009015e-07,
"loss": 0.5413,
"step": 28750
},
{
"epoch": 58.9,
"learning_rate": 7.770245101691128e-07,
"loss": 0.523,
"step": 28800
},
{
"epoch": 59.0,
"learning_rate": 7.397750130373241e-07,
"loss": 0.5474,
"step": 28850
},
{
"epoch": 59.1,
"learning_rate": 7.025255159055354e-07,
"loss": 0.5501,
"step": 28900
},
{
"epoch": 59.2,
"learning_rate": 6.652760187737465e-07,
"loss": 0.5168,
"step": 28950
},
{
"epoch": 59.3,
"learning_rate": 6.280265216419578e-07,
"loss": 0.5345,
"step": 29000
},
{
"epoch": 59.41,
"learning_rate": 5.907770245101692e-07,
"loss": 0.4981,
"step": 29050
},
{
"epoch": 59.51,
"learning_rate": 5.535275273783804e-07,
"loss": 0.5467,
"step": 29100
},
{
"epoch": 59.61,
"learning_rate": 5.162780302465918e-07,
"loss": 0.5244,
"step": 29150
},
{
"epoch": 59.71,
"learning_rate": 4.79028533114803e-07,
"loss": 0.537,
"step": 29200
},
{
"epoch": 59.82,
"learning_rate": 4.417790359830142e-07,
"loss": 0.5479,
"step": 29250
},
{
"epoch": 59.92,
"learning_rate": 4.0452953885122555e-07,
"loss": 0.543,
"step": 29300
},
{
"epoch": 60.02,
"learning_rate": 3.672800417194368e-07,
"loss": 0.5484,
"step": 29350
},
{
"epoch": 60.12,
"learning_rate": 3.300305445876481e-07,
"loss": 0.5378,
"step": 29400
},
{
"epoch": 60.22,
"learning_rate": 2.9278104745585936e-07,
"loss": 0.5348,
"step": 29450
},
{
"epoch": 60.33,
"learning_rate": 2.5553155032407065e-07,
"loss": 0.5335,
"step": 29500
},
{
"epoch": 60.43,
"learning_rate": 2.182820531922819e-07,
"loss": 0.5108,
"step": 29550
},
{
"epoch": 60.53,
"learning_rate": 1.81777546003129e-07,
"loss": 0.5365,
"step": 29600
},
{
"epoch": 60.63,
"learning_rate": 1.4452804887134025e-07,
"loss": 0.5546,
"step": 29650
},
{
"epoch": 60.74,
"learning_rate": 1.0727855173955152e-07,
"loss": 0.5508,
"step": 29700
},
{
"epoch": 60.84,
"learning_rate": 7.00290546077628e-08,
"loss": 0.5377,
"step": 29750
},
{
"epoch": 60.94,
"learning_rate": 3.277955747597408e-08,
"loss": 0.5273,
"step": 29800
},
{
"epoch": 61.0,
"step": 29829,
"total_flos": 1.766781652210423e+20,
"train_loss": 1.419654459913184,
"train_runtime": 9492.3385,
"train_samples_per_second": 125.678,
"train_steps_per_second": 3.142
}
],
"logging_steps": 50,
"max_steps": 29829,
"num_train_epochs": 61,
"save_steps": 500,
"total_flos": 1.766781652210423e+20,
"trial_name": null,
"trial_params": null
}