|
{
|
|
"best_metric": 0.1458934662818158,
|
|
"best_model_checkpoint": "results15\\checkpoint-64000",
|
|
"epoch": 1.151192,
|
|
"eval_steps": 4000,
|
|
"global_step": 68000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0008,
|
|
"grad_norm": 12.430644989013672,
|
|
"learning_rate": 1.8800000000000002e-06,
|
|
"loss": 1.5617,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.0016,
|
|
"grad_norm": 11.342061042785645,
|
|
"learning_rate": 3.88e-06,
|
|
"loss": 0.7924,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.0024,
|
|
"grad_norm": 6.638418674468994,
|
|
"learning_rate": 5.8800000000000005e-06,
|
|
"loss": 0.3435,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.0032,
|
|
"grad_norm": 19.131393432617188,
|
|
"learning_rate": 7.88e-06,
|
|
"loss": 0.2877,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.004,
|
|
"grad_norm": 11.609911918640137,
|
|
"learning_rate": 9.88e-06,
|
|
"loss": 0.3076,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.0048,
|
|
"grad_norm": 8.114494323730469,
|
|
"learning_rate": 9.992449799196789e-06,
|
|
"loss": 0.2996,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.0056,
|
|
"grad_norm": 11.082118034362793,
|
|
"learning_rate": 9.984417670682733e-06,
|
|
"loss": 0.3146,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.0064,
|
|
"grad_norm": 10.258833885192871,
|
|
"learning_rate": 9.976385542168675e-06,
|
|
"loss": 0.2866,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.0072,
|
|
"grad_norm": 4.887807369232178,
|
|
"learning_rate": 9.968353413654619e-06,
|
|
"loss": 0.2909,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.008,
|
|
"grad_norm": 6.694243431091309,
|
|
"learning_rate": 9.960321285140563e-06,
|
|
"loss": 0.3067,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.0088,
|
|
"grad_norm": 9.998162269592285,
|
|
"learning_rate": 9.952289156626507e-06,
|
|
"loss": 0.305,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.0096,
|
|
"grad_norm": 14.1092529296875,
|
|
"learning_rate": 9.94425702811245e-06,
|
|
"loss": 0.2772,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.0104,
|
|
"grad_norm": 11.840794563293457,
|
|
"learning_rate": 9.936224899598395e-06,
|
|
"loss": 0.3012,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.0112,
|
|
"grad_norm": 9.802504539489746,
|
|
"learning_rate": 9.928192771084338e-06,
|
|
"loss": 0.2899,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.012,
|
|
"grad_norm": 21.836496353149414,
|
|
"learning_rate": 9.920160642570282e-06,
|
|
"loss": 0.3063,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.0128,
|
|
"grad_norm": 8.876431465148926,
|
|
"learning_rate": 9.912128514056226e-06,
|
|
"loss": 0.3292,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.0136,
|
|
"grad_norm": 13.00693416595459,
|
|
"learning_rate": 9.904096385542169e-06,
|
|
"loss": 0.305,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.0144,
|
|
"grad_norm": 10.583467483520508,
|
|
"learning_rate": 9.896064257028112e-06,
|
|
"loss": 0.2992,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.0152,
|
|
"grad_norm": 10.112874031066895,
|
|
"learning_rate": 9.888032128514056e-06,
|
|
"loss": 0.2896,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.016,
|
|
"grad_norm": 7.511252403259277,
|
|
"learning_rate": 9.88e-06,
|
|
"loss": 0.3163,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.0168,
|
|
"grad_norm": 9.708840370178223,
|
|
"learning_rate": 9.871967871485944e-06,
|
|
"loss": 0.274,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.0176,
|
|
"grad_norm": 8.038116455078125,
|
|
"learning_rate": 9.863935742971888e-06,
|
|
"loss": 0.2818,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.0184,
|
|
"grad_norm": 9.183858871459961,
|
|
"learning_rate": 9.855903614457832e-06,
|
|
"loss": 0.2909,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.0192,
|
|
"grad_norm": 13.90609073638916,
|
|
"learning_rate": 9.847871485943776e-06,
|
|
"loss": 0.2839,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"grad_norm": 17.10437774658203,
|
|
"learning_rate": 9.83983935742972e-06,
|
|
"loss": 0.3,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.0208,
|
|
"grad_norm": 19.14161491394043,
|
|
"learning_rate": 9.831807228915664e-06,
|
|
"loss": 0.2813,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.0216,
|
|
"grad_norm": 13.615035057067871,
|
|
"learning_rate": 9.823775100401608e-06,
|
|
"loss": 0.3083,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.0224,
|
|
"grad_norm": 15.303385734558105,
|
|
"learning_rate": 9.81574297188755e-06,
|
|
"loss": 0.2885,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.0232,
|
|
"grad_norm": 13.770358085632324,
|
|
"learning_rate": 9.807710843373494e-06,
|
|
"loss": 0.2998,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.024,
|
|
"grad_norm": 8.218709945678711,
|
|
"learning_rate": 9.799678714859438e-06,
|
|
"loss": 0.3097,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.0248,
|
|
"grad_norm": 11.543137550354004,
|
|
"learning_rate": 9.791646586345382e-06,
|
|
"loss": 0.3023,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.0256,
|
|
"grad_norm": 17.413169860839844,
|
|
"learning_rate": 9.783614457831326e-06,
|
|
"loss": 0.2775,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.0264,
|
|
"grad_norm": 11.18759536743164,
|
|
"learning_rate": 9.77558232931727e-06,
|
|
"loss": 0.2624,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.0272,
|
|
"grad_norm": 9.198080062866211,
|
|
"learning_rate": 9.767550200803213e-06,
|
|
"loss": 0.2866,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.028,
|
|
"grad_norm": 11.68255615234375,
|
|
"learning_rate": 9.759518072289157e-06,
|
|
"loss": 0.2834,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.0288,
|
|
"grad_norm": 5.951743125915527,
|
|
"learning_rate": 9.751485943775101e-06,
|
|
"loss": 0.2766,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.0296,
|
|
"grad_norm": 11.82551383972168,
|
|
"learning_rate": 9.743453815261045e-06,
|
|
"loss": 0.2837,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.0304,
|
|
"grad_norm": 12.415241241455078,
|
|
"learning_rate": 9.735421686746989e-06,
|
|
"loss": 0.2834,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.0312,
|
|
"grad_norm": 11.2643404006958,
|
|
"learning_rate": 9.727389558232933e-06,
|
|
"loss": 0.2929,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.032,
|
|
"grad_norm": 10.987563133239746,
|
|
"learning_rate": 9.719357429718877e-06,
|
|
"loss": 0.2566,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.032,
|
|
"eval_test1_cer": 0.14171147957278363,
|
|
"eval_test1_cer_norm": 0.10705693087137129,
|
|
"eval_test1_loss": 0.265484482049942,
|
|
"eval_test1_runtime": 1239.3947,
|
|
"eval_test1_samples_per_second": 2.017,
|
|
"eval_test1_steps_per_second": 0.504,
|
|
"eval_test1_wer": 0.30916353246450334,
|
|
"eval_test1_wer_norm": 0.24156170548525674,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.032,
|
|
"eval_test2_cer": 0.26862377272557586,
|
|
"eval_test2_cer_norm": 0.208736829872947,
|
|
"eval_test2_loss": 0.44017764925956726,
|
|
"eval_test2_runtime": 1372.2097,
|
|
"eval_test2_samples_per_second": 1.822,
|
|
"eval_test2_steps_per_second": 0.455,
|
|
"eval_test2_wer": 0.47839894712748915,
|
|
"eval_test2_wer_norm": 0.4037073114829246,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.0328,
|
|
"grad_norm": 10.9562406539917,
|
|
"learning_rate": 9.711325301204821e-06,
|
|
"loss": 0.3019,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.0336,
|
|
"grad_norm": 5.758121013641357,
|
|
"learning_rate": 9.703293172690765e-06,
|
|
"loss": 0.2978,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.0344,
|
|
"grad_norm": 10.012574195861816,
|
|
"learning_rate": 9.695261044176709e-06,
|
|
"loss": 0.295,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.0352,
|
|
"grad_norm": 8.023552894592285,
|
|
"learning_rate": 9.687228915662651e-06,
|
|
"loss": 0.2642,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.036,
|
|
"grad_norm": 7.264355182647705,
|
|
"learning_rate": 9.679196787148595e-06,
|
|
"loss": 0.2856,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.0368,
|
|
"grad_norm": 17.713090896606445,
|
|
"learning_rate": 9.671164658634539e-06,
|
|
"loss": 0.2838,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.0376,
|
|
"grad_norm": 6.670265197753906,
|
|
"learning_rate": 9.663132530120483e-06,
|
|
"loss": 0.2555,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.0384,
|
|
"grad_norm": 8.260579109191895,
|
|
"learning_rate": 9.655100401606427e-06,
|
|
"loss": 0.2664,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.0392,
|
|
"grad_norm": 6.691250801086426,
|
|
"learning_rate": 9.64706827309237e-06,
|
|
"loss": 0.2731,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"grad_norm": 9.206613540649414,
|
|
"learning_rate": 9.639036144578314e-06,
|
|
"loss": 0.2425,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.0408,
|
|
"grad_norm": 8.662139892578125,
|
|
"learning_rate": 9.631004016064258e-06,
|
|
"loss": 0.2774,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.0416,
|
|
"grad_norm": 7.598929405212402,
|
|
"learning_rate": 9.622971887550202e-06,
|
|
"loss": 0.2874,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.0424,
|
|
"grad_norm": 6.341280937194824,
|
|
"learning_rate": 9.614939759036145e-06,
|
|
"loss": 0.2905,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 0.0432,
|
|
"grad_norm": 9.12799072265625,
|
|
"learning_rate": 9.606907630522088e-06,
|
|
"loss": 0.2913,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 0.044,
|
|
"grad_norm": 6.347501754760742,
|
|
"learning_rate": 9.598875502008032e-06,
|
|
"loss": 0.2809,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.0448,
|
|
"grad_norm": 8.827099800109863,
|
|
"learning_rate": 9.590843373493976e-06,
|
|
"loss": 0.2826,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.0456,
|
|
"grad_norm": 12.478800773620605,
|
|
"learning_rate": 9.58281124497992e-06,
|
|
"loss": 0.2742,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 0.0464,
|
|
"grad_norm": 8.185443878173828,
|
|
"learning_rate": 9.574779116465864e-06,
|
|
"loss": 0.2874,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 0.0472,
|
|
"grad_norm": 29.066831588745117,
|
|
"learning_rate": 9.566746987951808e-06,
|
|
"loss": 0.2753,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 0.048,
|
|
"grad_norm": 8.861905097961426,
|
|
"learning_rate": 9.558714859437752e-06,
|
|
"loss": 0.2649,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.0488,
|
|
"grad_norm": 11.275808334350586,
|
|
"learning_rate": 9.550682730923696e-06,
|
|
"loss": 0.2835,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 0.0496,
|
|
"grad_norm": 7.473984241485596,
|
|
"learning_rate": 9.542650602409638e-06,
|
|
"loss": 0.2571,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 0.0504,
|
|
"grad_norm": 6.271098613739014,
|
|
"learning_rate": 9.534618473895582e-06,
|
|
"loss": 0.2563,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 0.0512,
|
|
"grad_norm": 7.11587381362915,
|
|
"learning_rate": 9.526586345381526e-06,
|
|
"loss": 0.2642,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.052,
|
|
"grad_norm": 6.847942352294922,
|
|
"learning_rate": 9.51855421686747e-06,
|
|
"loss": 0.2651,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 0.0528,
|
|
"grad_norm": 9.094855308532715,
|
|
"learning_rate": 9.510522088353414e-06,
|
|
"loss": 0.283,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 0.0536,
|
|
"grad_norm": 7.255010604858398,
|
|
"learning_rate": 9.502489959839358e-06,
|
|
"loss": 0.2583,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 0.0544,
|
|
"grad_norm": 10.873835563659668,
|
|
"learning_rate": 9.494457831325302e-06,
|
|
"loss": 0.2707,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 0.0552,
|
|
"grad_norm": 9.488873481750488,
|
|
"learning_rate": 9.486425702811246e-06,
|
|
"loss": 0.2759,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 0.056,
|
|
"grad_norm": 8.395374298095703,
|
|
"learning_rate": 9.47839357429719e-06,
|
|
"loss": 0.2754,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 0.0568,
|
|
"grad_norm": 8.9957857131958,
|
|
"learning_rate": 9.470361445783133e-06,
|
|
"loss": 0.2672,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 0.0576,
|
|
"grad_norm": 12.339411735534668,
|
|
"learning_rate": 9.462329317269077e-06,
|
|
"loss": 0.2651,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 0.0584,
|
|
"grad_norm": 6.282168388366699,
|
|
"learning_rate": 9.454297188755021e-06,
|
|
"loss": 0.2746,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 0.0592,
|
|
"grad_norm": 11.324309349060059,
|
|
"learning_rate": 9.446265060240965e-06,
|
|
"loss": 0.2659,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"grad_norm": 19.300565719604492,
|
|
"learning_rate": 9.438232931726909e-06,
|
|
"loss": 0.2594,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 0.0608,
|
|
"grad_norm": 9.297099113464355,
|
|
"learning_rate": 9.430200803212853e-06,
|
|
"loss": 0.2841,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 0.0616,
|
|
"grad_norm": 8.913684844970703,
|
|
"learning_rate": 9.422168674698797e-06,
|
|
"loss": 0.2468,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 0.0624,
|
|
"grad_norm": 9.521671295166016,
|
|
"learning_rate": 9.414136546184741e-06,
|
|
"loss": 0.2612,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 0.0632,
|
|
"grad_norm": 4.213898181915283,
|
|
"learning_rate": 9.406104417670685e-06,
|
|
"loss": 0.2692,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 0.064,
|
|
"grad_norm": 12.735309600830078,
|
|
"learning_rate": 9.398072289156627e-06,
|
|
"loss": 0.2556,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.064,
|
|
"eval_test1_cer": 0.10348046904175069,
|
|
"eval_test1_cer_norm": 0.07815208778543473,
|
|
"eval_test1_loss": 0.24871498346328735,
|
|
"eval_test1_runtime": 1203.5945,
|
|
"eval_test1_samples_per_second": 2.077,
|
|
"eval_test1_steps_per_second": 0.519,
|
|
"eval_test1_wer": 0.2385200734715298,
|
|
"eval_test1_wer_norm": 0.17586720827610391,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.064,
|
|
"eval_test2_cer": 0.17687684324485758,
|
|
"eval_test2_cer_norm": 0.13818465292841647,
|
|
"eval_test2_loss": 0.41706421971321106,
|
|
"eval_test2_runtime": 1259.4129,
|
|
"eval_test2_samples_per_second": 1.985,
|
|
"eval_test2_steps_per_second": 0.496,
|
|
"eval_test2_wer": 0.3423552300297551,
|
|
"eval_test2_wer_norm": 0.27343570937428374,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.0648,
|
|
"grad_norm": 13.473925590515137,
|
|
"learning_rate": 9.390040160642571e-06,
|
|
"loss": 0.282,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 0.0656,
|
|
"grad_norm": 10.660150527954102,
|
|
"learning_rate": 9.382008032128515e-06,
|
|
"loss": 0.2741,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 0.0664,
|
|
"grad_norm": 6.230668544769287,
|
|
"learning_rate": 9.373975903614459e-06,
|
|
"loss": 0.2686,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 0.0672,
|
|
"grad_norm": 5.246466159820557,
|
|
"learning_rate": 9.365943775100403e-06,
|
|
"loss": 0.2382,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 0.068,
|
|
"grad_norm": 5.55143928527832,
|
|
"learning_rate": 9.357911646586347e-06,
|
|
"loss": 0.2386,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 0.0688,
|
|
"grad_norm": 7.775327682495117,
|
|
"learning_rate": 9.34987951807229e-06,
|
|
"loss": 0.2767,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 0.0696,
|
|
"grad_norm": 9.929217338562012,
|
|
"learning_rate": 9.341847389558234e-06,
|
|
"loss": 0.241,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 0.0704,
|
|
"grad_norm": 6.488868236541748,
|
|
"learning_rate": 9.333815261044178e-06,
|
|
"loss": 0.2761,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 0.0712,
|
|
"grad_norm": 10.308218955993652,
|
|
"learning_rate": 9.325783132530122e-06,
|
|
"loss": 0.2927,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 0.072,
|
|
"grad_norm": 16.63512420654297,
|
|
"learning_rate": 9.317751004016065e-06,
|
|
"loss": 0.262,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 0.0728,
|
|
"grad_norm": 8.194585800170898,
|
|
"learning_rate": 9.309718875502008e-06,
|
|
"loss": 0.2535,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 0.0736,
|
|
"grad_norm": 10.36143970489502,
|
|
"learning_rate": 9.301686746987952e-06,
|
|
"loss": 0.2692,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 0.0744,
|
|
"grad_norm": 8.524481773376465,
|
|
"learning_rate": 9.293654618473896e-06,
|
|
"loss": 0.2785,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 0.0752,
|
|
"grad_norm": 5.517594337463379,
|
|
"learning_rate": 9.28562248995984e-06,
|
|
"loss": 0.26,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 0.076,
|
|
"grad_norm": 8.915616989135742,
|
|
"learning_rate": 9.277670682730925e-06,
|
|
"loss": 0.2491,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 0.0768,
|
|
"grad_norm": 10.76728343963623,
|
|
"learning_rate": 9.269638554216868e-06,
|
|
"loss": 0.2721,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 0.0776,
|
|
"grad_norm": 9.26211929321289,
|
|
"learning_rate": 9.261606425702812e-06,
|
|
"loss": 0.2614,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 0.0784,
|
|
"grad_norm": 7.043396472930908,
|
|
"learning_rate": 9.253574297188756e-06,
|
|
"loss": 0.2445,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 0.0792,
|
|
"grad_norm": 6.651730537414551,
|
|
"learning_rate": 9.2455421686747e-06,
|
|
"loss": 0.2872,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"grad_norm": 19.39574432373047,
|
|
"learning_rate": 9.237510040160642e-06,
|
|
"loss": 0.2755,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 0.0808,
|
|
"grad_norm": 7.920570373535156,
|
|
"learning_rate": 9.229477911646586e-06,
|
|
"loss": 0.2596,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 0.0816,
|
|
"grad_norm": 8.022053718566895,
|
|
"learning_rate": 9.22144578313253e-06,
|
|
"loss": 0.257,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 0.0824,
|
|
"grad_norm": 11.654303550720215,
|
|
"learning_rate": 9.213413654618474e-06,
|
|
"loss": 0.257,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 0.0832,
|
|
"grad_norm": 8.477298736572266,
|
|
"learning_rate": 9.205381526104418e-06,
|
|
"loss": 0.2405,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 0.084,
|
|
"grad_norm": 6.778027534484863,
|
|
"learning_rate": 9.197349397590362e-06,
|
|
"loss": 0.2688,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 0.0848,
|
|
"grad_norm": 3.938582420349121,
|
|
"learning_rate": 9.189317269076306e-06,
|
|
"loss": 0.2512,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 0.0856,
|
|
"grad_norm": 7.830332279205322,
|
|
"learning_rate": 9.18128514056225e-06,
|
|
"loss": 0.2595,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 0.0864,
|
|
"grad_norm": 6.613556861877441,
|
|
"learning_rate": 9.173253012048194e-06,
|
|
"loss": 0.2182,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 0.0872,
|
|
"grad_norm": 7.161635398864746,
|
|
"learning_rate": 9.165220883534138e-06,
|
|
"loss": 0.2548,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 0.088,
|
|
"grad_norm": 8.175447463989258,
|
|
"learning_rate": 9.157188755020082e-06,
|
|
"loss": 0.2754,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 0.0888,
|
|
"grad_norm": 8.775857925415039,
|
|
"learning_rate": 9.149156626506026e-06,
|
|
"loss": 0.2552,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 0.0896,
|
|
"grad_norm": 8.940972328186035,
|
|
"learning_rate": 9.14112449799197e-06,
|
|
"loss": 0.247,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 0.0904,
|
|
"grad_norm": 7.314133644104004,
|
|
"learning_rate": 9.133092369477913e-06,
|
|
"loss": 0.2463,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 0.0912,
|
|
"grad_norm": 13.069714546203613,
|
|
"learning_rate": 9.125060240963857e-06,
|
|
"loss": 0.2368,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 0.092,
|
|
"grad_norm": 3.7738101482391357,
|
|
"learning_rate": 9.1170281124498e-06,
|
|
"loss": 0.2429,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 0.0928,
|
|
"grad_norm": 4.591954708099365,
|
|
"learning_rate": 9.108995983935743e-06,
|
|
"loss": 0.2465,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 0.0936,
|
|
"grad_norm": 7.4973225593566895,
|
|
"learning_rate": 9.100963855421687e-06,
|
|
"loss": 0.2526,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 0.0944,
|
|
"grad_norm": 16.275197982788086,
|
|
"learning_rate": 9.092931726907631e-06,
|
|
"loss": 0.2469,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 0.0952,
|
|
"grad_norm": 13.245892524719238,
|
|
"learning_rate": 9.084899598393575e-06,
|
|
"loss": 0.2715,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 0.096,
|
|
"grad_norm": 10.107722282409668,
|
|
"learning_rate": 9.076867469879519e-06,
|
|
"loss": 0.2288,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 0.096,
|
|
"eval_test1_cer": 0.08446299200836507,
|
|
"eval_test1_cer_norm": 0.05986985857324657,
|
|
"eval_test1_loss": 0.24487894773483276,
|
|
"eval_test1_runtime": 1174.9494,
|
|
"eval_test1_samples_per_second": 2.128,
|
|
"eval_test1_steps_per_second": 0.532,
|
|
"eval_test1_wer": 0.2207936091431237,
|
|
"eval_test1_wer_norm": 0.15590753674858995,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 0.096,
|
|
"eval_test2_cer": 0.21867417030649194,
|
|
"eval_test2_cer_norm": 0.17033041524635884,
|
|
"eval_test2_loss": 0.4188956022262573,
|
|
"eval_test2_runtime": 1303.0061,
|
|
"eval_test2_samples_per_second": 1.919,
|
|
"eval_test2_steps_per_second": 0.48,
|
|
"eval_test2_wer": 0.38798924238956284,
|
|
"eval_test2_wer_norm": 0.31910382764153106,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 0.0968,
|
|
"grad_norm": 13.199896812438965,
|
|
"learning_rate": 9.068835341365463e-06,
|
|
"loss": 0.2436,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 0.0976,
|
|
"grad_norm": 8.258294105529785,
|
|
"learning_rate": 9.060803212851407e-06,
|
|
"loss": 0.2677,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 0.0984,
|
|
"grad_norm": 6.465874671936035,
|
|
"learning_rate": 9.052771084337351e-06,
|
|
"loss": 0.2659,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 0.0992,
|
|
"grad_norm": 8.216276168823242,
|
|
"learning_rate": 9.044738955823293e-06,
|
|
"loss": 0.2553,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"grad_norm": 7.199169158935547,
|
|
"learning_rate": 9.036706827309237e-06,
|
|
"loss": 0.2244,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 0.1008,
|
|
"grad_norm": 5.689333438873291,
|
|
"learning_rate": 9.028674698795181e-06,
|
|
"loss": 0.2366,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 0.1016,
|
|
"grad_norm": 9.028944969177246,
|
|
"learning_rate": 9.020642570281125e-06,
|
|
"loss": 0.2152,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 0.1024,
|
|
"grad_norm": 6.4437127113342285,
|
|
"learning_rate": 9.012610441767069e-06,
|
|
"loss": 0.2549,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 0.1032,
|
|
"grad_norm": 9.835594177246094,
|
|
"learning_rate": 9.004578313253013e-06,
|
|
"loss": 0.2383,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 0.104,
|
|
"grad_norm": 7.06815767288208,
|
|
"learning_rate": 8.996546184738957e-06,
|
|
"loss": 0.2381,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 0.1048,
|
|
"grad_norm": 5.8962297439575195,
|
|
"learning_rate": 8.9885140562249e-06,
|
|
"loss": 0.251,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 0.1056,
|
|
"grad_norm": 8.3475980758667,
|
|
"learning_rate": 8.980481927710844e-06,
|
|
"loss": 0.2315,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 0.1064,
|
|
"grad_norm": 9.03033447265625,
|
|
"learning_rate": 8.972449799196787e-06,
|
|
"loss": 0.2229,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 0.1072,
|
|
"grad_norm": 2.628983497619629,
|
|
"learning_rate": 8.96441767068273e-06,
|
|
"loss": 0.2563,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 0.108,
|
|
"grad_norm": 12.840631484985352,
|
|
"learning_rate": 8.956385542168675e-06,
|
|
"loss": 0.2499,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 0.1088,
|
|
"grad_norm": 5.249873161315918,
|
|
"learning_rate": 8.94843373493976e-06,
|
|
"loss": 0.2406,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 0.1096,
|
|
"grad_norm": 7.155534267425537,
|
|
"learning_rate": 8.940401606425704e-06,
|
|
"loss": 0.2345,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 0.1104,
|
|
"grad_norm": 6.4776787757873535,
|
|
"learning_rate": 8.932369477911648e-06,
|
|
"loss": 0.2504,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 0.1112,
|
|
"grad_norm": 13.455100059509277,
|
|
"learning_rate": 8.924337349397592e-06,
|
|
"loss": 0.2557,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 0.112,
|
|
"grad_norm": 7.236751079559326,
|
|
"learning_rate": 8.916305220883535e-06,
|
|
"loss": 0.2355,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 0.1128,
|
|
"grad_norm": 2.6910972595214844,
|
|
"learning_rate": 8.908273092369478e-06,
|
|
"loss": 0.2406,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 0.1136,
|
|
"grad_norm": 10.298126220703125,
|
|
"learning_rate": 8.900240963855422e-06,
|
|
"loss": 0.2418,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 0.1144,
|
|
"grad_norm": 9.934380531311035,
|
|
"learning_rate": 8.892208835341366e-06,
|
|
"loss": 0.2457,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 0.1152,
|
|
"grad_norm": 5.105091571807861,
|
|
"learning_rate": 8.88417670682731e-06,
|
|
"loss": 0.2278,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 0.116,
|
|
"grad_norm": 8.721418380737305,
|
|
"learning_rate": 8.876144578313254e-06,
|
|
"loss": 0.2355,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 0.1168,
|
|
"grad_norm": 10.942416191101074,
|
|
"learning_rate": 8.868112449799198e-06,
|
|
"loss": 0.2311,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 0.1176,
|
|
"grad_norm": 5.4364705085754395,
|
|
"learning_rate": 8.860080321285142e-06,
|
|
"loss": 0.26,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 0.1184,
|
|
"grad_norm": 10.078380584716797,
|
|
"learning_rate": 8.852048192771086e-06,
|
|
"loss": 0.2596,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 0.1192,
|
|
"grad_norm": 9.630817413330078,
|
|
"learning_rate": 8.844016064257028e-06,
|
|
"loss": 0.2182,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"grad_norm": 9.832001686096191,
|
|
"learning_rate": 8.835983935742972e-06,
|
|
"loss": 0.2581,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 0.1208,
|
|
"grad_norm": 8.944684028625488,
|
|
"learning_rate": 8.827951807228916e-06,
|
|
"loss": 0.2388,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 0.1216,
|
|
"grad_norm": 8.616948127746582,
|
|
"learning_rate": 8.81991967871486e-06,
|
|
"loss": 0.2433,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 0.1224,
|
|
"grad_norm": 7.957048416137695,
|
|
"learning_rate": 8.811887550200804e-06,
|
|
"loss": 0.2338,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 0.1232,
|
|
"grad_norm": 6.853145122528076,
|
|
"learning_rate": 8.803855421686748e-06,
|
|
"loss": 0.2328,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 0.124,
|
|
"grad_norm": 5.673152923583984,
|
|
"learning_rate": 8.795823293172692e-06,
|
|
"loss": 0.2492,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 0.1248,
|
|
"grad_norm": 9.295065879821777,
|
|
"learning_rate": 8.787871485943776e-06,
|
|
"loss": 0.2342,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 0.1256,
|
|
"grad_norm": 7.0689215660095215,
|
|
"learning_rate": 8.77983935742972e-06,
|
|
"loss": 0.2481,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 0.1264,
|
|
"grad_norm": 3.174997329711914,
|
|
"learning_rate": 8.771807228915664e-06,
|
|
"loss": 0.2384,
|
|
"step": 15800
|
|
},
|
|
{
|
|
"epoch": 0.1272,
|
|
"grad_norm": 10.203186988830566,
|
|
"learning_rate": 8.763775100401608e-06,
|
|
"loss": 0.2393,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 0.128,
|
|
"grad_norm": 7.220344066619873,
|
|
"learning_rate": 8.755742971887552e-06,
|
|
"loss": 0.2696,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 0.128,
|
|
"eval_test1_cer": 0.061454365523937564,
|
|
"eval_test1_cer_norm": 0.04506927269671285,
|
|
"eval_test1_loss": 0.22977162897586823,
|
|
"eval_test1_runtime": 1134.9922,
|
|
"eval_test1_samples_per_second": 2.203,
|
|
"eval_test1_steps_per_second": 0.551,
|
|
"eval_test1_wer": 0.17863494562523688,
|
|
"eval_test1_wer_norm": 0.11663111137087583,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 0.128,
|
|
"eval_test2_cer": 0.16853791018031133,
|
|
"eval_test2_cer_norm": 0.13256313913851875,
|
|
"eval_test2_loss": 0.4023756980895996,
|
|
"eval_test2_runtime": 1236.1872,
|
|
"eval_test2_samples_per_second": 2.022,
|
|
"eval_test2_steps_per_second": 0.506,
|
|
"eval_test2_wer": 0.32633325703822386,
|
|
"eval_test2_wer_norm": 0.2594545037818015,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 0.1288,
|
|
"grad_norm": 6.053903102874756,
|
|
"learning_rate": 8.747710843373496e-06,
|
|
"loss": 0.2549,
|
|
"step": 16100
|
|
},
|
|
{
|
|
"epoch": 0.1296,
|
|
"grad_norm": 7.160613536834717,
|
|
"learning_rate": 8.73967871485944e-06,
|
|
"loss": 0.247,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 0.1304,
|
|
"grad_norm": 9.369489669799805,
|
|
"learning_rate": 8.731726907630524e-06,
|
|
"loss": 0.2391,
|
|
"step": 16300
|
|
},
|
|
{
|
|
"epoch": 0.1312,
|
|
"grad_norm": 7.7761549949646,
|
|
"learning_rate": 8.723694779116466e-06,
|
|
"loss": 0.2477,
|
|
"step": 16400
|
|
},
|
|
{
|
|
"epoch": 0.132,
|
|
"grad_norm": 15.849722862243652,
|
|
"learning_rate": 8.71566265060241e-06,
|
|
"loss": 0.2477,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 0.1328,
|
|
"grad_norm": 4.059557914733887,
|
|
"learning_rate": 8.707630522088354e-06,
|
|
"loss": 0.2358,
|
|
"step": 16600
|
|
},
|
|
{
|
|
"epoch": 0.1336,
|
|
"grad_norm": 7.216188907623291,
|
|
"learning_rate": 8.699598393574298e-06,
|
|
"loss": 0.2663,
|
|
"step": 16700
|
|
},
|
|
{
|
|
"epoch": 0.1344,
|
|
"grad_norm": 5.579075336456299,
|
|
"learning_rate": 8.691566265060242e-06,
|
|
"loss": 0.2388,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 0.1352,
|
|
"grad_norm": 7.436819553375244,
|
|
"learning_rate": 8.683534136546186e-06,
|
|
"loss": 0.2262,
|
|
"step": 16900
|
|
},
|
|
{
|
|
"epoch": 0.136,
|
|
"grad_norm": 9.426555633544922,
|
|
"learning_rate": 8.67550200803213e-06,
|
|
"loss": 0.2334,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 0.1368,
|
|
"grad_norm": 11.434511184692383,
|
|
"learning_rate": 8.667469879518073e-06,
|
|
"loss": 0.2536,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 0.1376,
|
|
"grad_norm": 9.391510963439941,
|
|
"learning_rate": 8.659437751004017e-06,
|
|
"loss": 0.2311,
|
|
"step": 17200
|
|
},
|
|
{
|
|
"epoch": 0.1384,
|
|
"grad_norm": 14.07253646850586,
|
|
"learning_rate": 8.65140562248996e-06,
|
|
"loss": 0.2272,
|
|
"step": 17300
|
|
},
|
|
{
|
|
"epoch": 0.1392,
|
|
"grad_norm": 10.38249683380127,
|
|
"learning_rate": 8.643373493975904e-06,
|
|
"loss": 0.2602,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"grad_norm": 11.22042465209961,
|
|
"learning_rate": 8.635341365461847e-06,
|
|
"loss": 0.2664,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 0.1408,
|
|
"grad_norm": 8.036049842834473,
|
|
"learning_rate": 8.627309236947791e-06,
|
|
"loss": 0.2423,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 0.1416,
|
|
"grad_norm": 8.139640808105469,
|
|
"learning_rate": 8.619277108433735e-06,
|
|
"loss": 0.2336,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 0.1424,
|
|
"grad_norm": 10.12194538116455,
|
|
"learning_rate": 8.61124497991968e-06,
|
|
"loss": 0.2329,
|
|
"step": 17800
|
|
},
|
|
{
|
|
"epoch": 0.1432,
|
|
"grad_norm": 8.494665145874023,
|
|
"learning_rate": 8.603212851405623e-06,
|
|
"loss": 0.2411,
|
|
"step": 17900
|
|
},
|
|
{
|
|
"epoch": 0.144,
|
|
"grad_norm": 7.564216136932373,
|
|
"learning_rate": 8.595180722891567e-06,
|
|
"loss": 0.2477,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 0.1448,
|
|
"grad_norm": 6.718033313751221,
|
|
"learning_rate": 8.587148594377511e-06,
|
|
"loss": 0.243,
|
|
"step": 18100
|
|
},
|
|
{
|
|
"epoch": 0.1456,
|
|
"grad_norm": 4.666207313537598,
|
|
"learning_rate": 8.579116465863455e-06,
|
|
"loss": 0.2492,
|
|
"step": 18200
|
|
},
|
|
{
|
|
"epoch": 0.1464,
|
|
"grad_norm": 9.410490036010742,
|
|
"learning_rate": 8.571084337349397e-06,
|
|
"loss": 0.2265,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 0.1472,
|
|
"grad_norm": 6.1468119621276855,
|
|
"learning_rate": 8.563052208835341e-06,
|
|
"loss": 0.2409,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 0.148,
|
|
"grad_norm": 9.485304832458496,
|
|
"learning_rate": 8.555020080321285e-06,
|
|
"loss": 0.2584,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 0.1488,
|
|
"grad_norm": 4.741575717926025,
|
|
"learning_rate": 8.546987951807229e-06,
|
|
"loss": 0.2309,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 0.1496,
|
|
"grad_norm": 7.345742225646973,
|
|
"learning_rate": 8.538955823293173e-06,
|
|
"loss": 0.233,
|
|
"step": 18700
|
|
},
|
|
{
|
|
"epoch": 0.1504,
|
|
"grad_norm": 17.133358001708984,
|
|
"learning_rate": 8.530923694779117e-06,
|
|
"loss": 0.2335,
|
|
"step": 18800
|
|
},
|
|
{
|
|
"epoch": 0.1512,
|
|
"grad_norm": 7.573137283325195,
|
|
"learning_rate": 8.52289156626506e-06,
|
|
"loss": 0.2346,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 0.152,
|
|
"grad_norm": 11.430822372436523,
|
|
"learning_rate": 8.514859437751005e-06,
|
|
"loss": 0.2435,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 0.1528,
|
|
"grad_norm": 7.896162986755371,
|
|
"learning_rate": 8.506827309236948e-06,
|
|
"loss": 0.239,
|
|
"step": 19100
|
|
},
|
|
{
|
|
"epoch": 0.1536,
|
|
"grad_norm": 7.543678283691406,
|
|
"learning_rate": 8.498795180722892e-06,
|
|
"loss": 0.2509,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 0.1544,
|
|
"grad_norm": 11.438505172729492,
|
|
"learning_rate": 8.490763052208836e-06,
|
|
"loss": 0.2244,
|
|
"step": 19300
|
|
},
|
|
{
|
|
"epoch": 0.1552,
|
|
"grad_norm": 5.8480730056762695,
|
|
"learning_rate": 8.48281124497992e-06,
|
|
"loss": 0.2428,
|
|
"step": 19400
|
|
},
|
|
{
|
|
"epoch": 0.156,
|
|
"grad_norm": 6.436691761016846,
|
|
"learning_rate": 8.474779116465865e-06,
|
|
"loss": 0.2367,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 0.1568,
|
|
"grad_norm": 7.197071075439453,
|
|
"learning_rate": 8.466746987951808e-06,
|
|
"loss": 0.2162,
|
|
"step": 19600
|
|
},
|
|
{
|
|
"epoch": 0.1576,
|
|
"grad_norm": 4.8729634284973145,
|
|
"learning_rate": 8.458714859437752e-06,
|
|
"loss": 0.2231,
|
|
"step": 19700
|
|
},
|
|
{
|
|
"epoch": 0.1584,
|
|
"grad_norm": 7.567530155181885,
|
|
"learning_rate": 8.450682730923695e-06,
|
|
"loss": 0.2208,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 0.1592,
|
|
"grad_norm": 6.356396675109863,
|
|
"learning_rate": 8.442650602409639e-06,
|
|
"loss": 0.2217,
|
|
"step": 19900
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"grad_norm": 11.299875259399414,
|
|
"learning_rate": 8.434618473895582e-06,
|
|
"loss": 0.2376,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_test1_cer": 0.0942891552767197,
|
|
"eval_test1_cer_norm": 0.06996422311330948,
|
|
"eval_test1_loss": 0.22547538578510284,
|
|
"eval_test1_runtime": 1189.7817,
|
|
"eval_test1_samples_per_second": 2.101,
|
|
"eval_test1_steps_per_second": 0.525,
|
|
"eval_test1_wer": 0.22478789469080732,
|
|
"eval_test1_wer_norm": 0.16227826646015373,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_test2_cer": 0.17272370926195543,
|
|
"eval_test2_cer_norm": 0.13756488224356989,
|
|
"eval_test2_loss": 0.3925027847290039,
|
|
"eval_test2_runtime": 1241.7968,
|
|
"eval_test2_samples_per_second": 2.013,
|
|
"eval_test2_steps_per_second": 0.503,
|
|
"eval_test2_wer": 0.3331712062256809,
|
|
"eval_test2_wer_norm": 0.26449690579876234,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 0.1608,
|
|
"grad_norm": 3.2471466064453125,
|
|
"learning_rate": 8.426586345381526e-06,
|
|
"loss": 0.2182,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 0.1616,
|
|
"grad_norm": 8.284675598144531,
|
|
"learning_rate": 8.41855421686747e-06,
|
|
"loss": 0.2382,
|
|
"step": 20200
|
|
},
|
|
{
|
|
"epoch": 0.1624,
|
|
"grad_norm": 5.969610214233398,
|
|
"learning_rate": 8.410522088353414e-06,
|
|
"loss": 0.2372,
|
|
"step": 20300
|
|
},
|
|
{
|
|
"epoch": 0.1632,
|
|
"grad_norm": 12.841833114624023,
|
|
"learning_rate": 8.402489959839358e-06,
|
|
"loss": 0.2389,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 0.164,
|
|
"grad_norm": 6.656698703765869,
|
|
"learning_rate": 8.394457831325302e-06,
|
|
"loss": 0.2368,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"epoch": 0.1648,
|
|
"grad_norm": 5.6654276847839355,
|
|
"learning_rate": 8.386425702811246e-06,
|
|
"loss": 0.2122,
|
|
"step": 20600
|
|
},
|
|
{
|
|
"epoch": 0.1656,
|
|
"grad_norm": 4.837275505065918,
|
|
"learning_rate": 8.378393574297188e-06,
|
|
"loss": 0.2088,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 0.1664,
|
|
"grad_norm": 7.151382923126221,
|
|
"learning_rate": 8.370361445783132e-06,
|
|
"loss": 0.2328,
|
|
"step": 20800
|
|
},
|
|
{
|
|
"epoch": 0.1672,
|
|
"grad_norm": 10.351961135864258,
|
|
"learning_rate": 8.362329317269076e-06,
|
|
"loss": 0.2217,
|
|
"step": 20900
|
|
},
|
|
{
|
|
"epoch": 0.168,
|
|
"grad_norm": 4.848665714263916,
|
|
"learning_rate": 8.35429718875502e-06,
|
|
"loss": 0.2372,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 0.1688,
|
|
"grad_norm": 4.431462287902832,
|
|
"learning_rate": 8.346265060240964e-06,
|
|
"loss": 0.2193,
|
|
"step": 21100
|
|
},
|
|
{
|
|
"epoch": 0.1696,
|
|
"grad_norm": 4.42501163482666,
|
|
"learning_rate": 8.338232931726908e-06,
|
|
"loss": 0.2198,
|
|
"step": 21200
|
|
},
|
|
{
|
|
"epoch": 0.1704,
|
|
"grad_norm": 11.573799133300781,
|
|
"learning_rate": 8.330200803212852e-06,
|
|
"loss": 0.2454,
|
|
"step": 21300
|
|
},
|
|
{
|
|
"epoch": 0.1712,
|
|
"grad_norm": 5.41486120223999,
|
|
"learning_rate": 8.322168674698796e-06,
|
|
"loss": 0.2328,
|
|
"step": 21400
|
|
},
|
|
{
|
|
"epoch": 0.172,
|
|
"grad_norm": 5.6592488288879395,
|
|
"learning_rate": 8.31413654618474e-06,
|
|
"loss": 0.2388,
|
|
"step": 21500
|
|
},
|
|
{
|
|
"epoch": 0.1728,
|
|
"grad_norm": 5.244436264038086,
|
|
"learning_rate": 8.306104417670683e-06,
|
|
"loss": 0.2384,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 0.1736,
|
|
"grad_norm": 4.57738733291626,
|
|
"learning_rate": 8.298072289156627e-06,
|
|
"loss": 0.2227,
|
|
"step": 21700
|
|
},
|
|
{
|
|
"epoch": 0.1744,
|
|
"grad_norm": 11.43252944946289,
|
|
"learning_rate": 8.290040160642571e-06,
|
|
"loss": 0.2265,
|
|
"step": 21800
|
|
},
|
|
{
|
|
"epoch": 0.1752,
|
|
"grad_norm": 4.118782997131348,
|
|
"learning_rate": 8.282008032128515e-06,
|
|
"loss": 0.2211,
|
|
"step": 21900
|
|
},
|
|
{
|
|
"epoch": 0.176,
|
|
"grad_norm": 7.751514434814453,
|
|
"learning_rate": 8.273975903614459e-06,
|
|
"loss": 0.2359,
|
|
"step": 22000
|
|
},
|
|
{
|
|
"epoch": 0.1768,
|
|
"grad_norm": 11.36629867553711,
|
|
"learning_rate": 8.265943775100403e-06,
|
|
"loss": 0.234,
|
|
"step": 22100
|
|
},
|
|
{
|
|
"epoch": 0.1776,
|
|
"grad_norm": 4.669189453125,
|
|
"learning_rate": 8.257911646586347e-06,
|
|
"loss": 0.222,
|
|
"step": 22200
|
|
},
|
|
{
|
|
"epoch": 0.1784,
|
|
"grad_norm": 8.595556259155273,
|
|
"learning_rate": 8.249879518072291e-06,
|
|
"loss": 0.2188,
|
|
"step": 22300
|
|
},
|
|
{
|
|
"epoch": 0.1792,
|
|
"grad_norm": 9.70352840423584,
|
|
"learning_rate": 8.241847389558235e-06,
|
|
"loss": 0.2289,
|
|
"step": 22400
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"grad_norm": 6.345508575439453,
|
|
"learning_rate": 8.233815261044177e-06,
|
|
"loss": 0.222,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"epoch": 0.1808,
|
|
"grad_norm": 4.9803643226623535,
|
|
"learning_rate": 8.225783132530121e-06,
|
|
"loss": 0.2338,
|
|
"step": 22600
|
|
},
|
|
{
|
|
"epoch": 0.1816,
|
|
"grad_norm": 9.151689529418945,
|
|
"learning_rate": 8.217751004016065e-06,
|
|
"loss": 0.2475,
|
|
"step": 22700
|
|
},
|
|
{
|
|
"epoch": 0.1824,
|
|
"grad_norm": 7.338719844818115,
|
|
"learning_rate": 8.209718875502009e-06,
|
|
"loss": 0.2194,
|
|
"step": 22800
|
|
},
|
|
{
|
|
"epoch": 0.1832,
|
|
"grad_norm": 15.189725875854492,
|
|
"learning_rate": 8.201686746987953e-06,
|
|
"loss": 0.2406,
|
|
"step": 22900
|
|
},
|
|
{
|
|
"epoch": 0.184,
|
|
"grad_norm": 6.638333320617676,
|
|
"learning_rate": 8.193654618473897e-06,
|
|
"loss": 0.2365,
|
|
"step": 23000
|
|
},
|
|
{
|
|
"epoch": 0.1848,
|
|
"grad_norm": 11.742992401123047,
|
|
"learning_rate": 8.18562248995984e-06,
|
|
"loss": 0.2187,
|
|
"step": 23100
|
|
},
|
|
{
|
|
"epoch": 0.1856,
|
|
"grad_norm": 13.45174789428711,
|
|
"learning_rate": 8.177590361445784e-06,
|
|
"loss": 0.215,
|
|
"step": 23200
|
|
},
|
|
{
|
|
"epoch": 0.1864,
|
|
"grad_norm": 4.851787090301514,
|
|
"learning_rate": 8.169558232931728e-06,
|
|
"loss": 0.2547,
|
|
"step": 23300
|
|
},
|
|
{
|
|
"epoch": 0.1872,
|
|
"grad_norm": 15.86487865447998,
|
|
"learning_rate": 8.161526104417672e-06,
|
|
"loss": 0.205,
|
|
"step": 23400
|
|
},
|
|
{
|
|
"epoch": 0.188,
|
|
"grad_norm": 7.100121974945068,
|
|
"learning_rate": 8.153574297188755e-06,
|
|
"loss": 0.2496,
|
|
"step": 23500
|
|
},
|
|
{
|
|
"epoch": 0.1888,
|
|
"grad_norm": 4.416376113891602,
|
|
"learning_rate": 8.145542168674699e-06,
|
|
"loss": 0.2205,
|
|
"step": 23600
|
|
},
|
|
{
|
|
"epoch": 0.1896,
|
|
"grad_norm": 8.244140625,
|
|
"learning_rate": 8.137510040160643e-06,
|
|
"loss": 0.216,
|
|
"step": 23700
|
|
},
|
|
{
|
|
"epoch": 0.1904,
|
|
"grad_norm": 7.894223690032959,
|
|
"learning_rate": 8.129477911646587e-06,
|
|
"loss": 0.2482,
|
|
"step": 23800
|
|
},
|
|
{
|
|
"epoch": 0.1912,
|
|
"grad_norm": 7.517197608947754,
|
|
"learning_rate": 8.121526104417673e-06,
|
|
"loss": 0.2349,
|
|
"step": 23900
|
|
},
|
|
{
|
|
"epoch": 0.192,
|
|
"grad_norm": 18.533597946166992,
|
|
"learning_rate": 8.113493975903615e-06,
|
|
"loss": 0.2205,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 0.192,
|
|
"eval_test1_cer": 0.08556464261707372,
|
|
"eval_test1_cer_norm": 0.06268398684178933,
|
|
"eval_test1_loss": 0.22241076827049255,
|
|
"eval_test1_runtime": 1196.1164,
|
|
"eval_test1_samples_per_second": 2.09,
|
|
"eval_test1_steps_per_second": 0.523,
|
|
"eval_test1_wer": 0.21058923000670574,
|
|
"eval_test1_wer_norm": 0.14939068938309127,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 0.192,
|
|
"eval_test2_cer": 0.13958729980960913,
|
|
"eval_test2_cer_norm": 0.1119799736597459,
|
|
"eval_test2_loss": 0.3912878632545471,
|
|
"eval_test2_runtime": 1198.2977,
|
|
"eval_test2_samples_per_second": 2.086,
|
|
"eval_test2_steps_per_second": 0.522,
|
|
"eval_test2_wer": 0.28147173266193637,
|
|
"eval_test2_wer_norm": 0.21352853541141417,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 0.1928,
|
|
"grad_norm": 7.209935665130615,
|
|
"learning_rate": 8.1055421686747e-06,
|
|
"loss": 0.2215,
|
|
"step": 24100
|
|
},
|
|
{
|
|
"epoch": 0.1936,
|
|
"grad_norm": 8.64651107788086,
|
|
"learning_rate": 8.097590361445784e-06,
|
|
"loss": 0.2163,
|
|
"step": 24200
|
|
},
|
|
{
|
|
"epoch": 0.1944,
|
|
"grad_norm": 11.35953426361084,
|
|
"learning_rate": 8.089558232931727e-06,
|
|
"loss": 0.2356,
|
|
"step": 24300
|
|
},
|
|
{
|
|
"epoch": 0.1952,
|
|
"grad_norm": 10.773683547973633,
|
|
"learning_rate": 8.081526104417671e-06,
|
|
"loss": 0.2359,
|
|
"step": 24400
|
|
},
|
|
{
|
|
"epoch": 0.196,
|
|
"grad_norm": 7.809534072875977,
|
|
"learning_rate": 8.073493975903615e-06,
|
|
"loss": 0.2224,
|
|
"step": 24500
|
|
},
|
|
{
|
|
"epoch": 0.1968,
|
|
"grad_norm": 6.901273727416992,
|
|
"learning_rate": 8.06546184738956e-06,
|
|
"loss": 0.2193,
|
|
"step": 24600
|
|
},
|
|
{
|
|
"epoch": 0.1976,
|
|
"grad_norm": 10.301507949829102,
|
|
"learning_rate": 8.057429718875503e-06,
|
|
"loss": 0.238,
|
|
"step": 24700
|
|
},
|
|
{
|
|
"epoch": 0.1984,
|
|
"grad_norm": 6.89518404006958,
|
|
"learning_rate": 8.049397590361447e-06,
|
|
"loss": 0.2358,
|
|
"step": 24800
|
|
},
|
|
{
|
|
"epoch": 0.1992,
|
|
"grad_norm": 3.8990767002105713,
|
|
"learning_rate": 8.041365461847391e-06,
|
|
"loss": 0.2192,
|
|
"step": 24900
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"grad_norm": 6.363491058349609,
|
|
"learning_rate": 8.033333333333335e-06,
|
|
"loss": 0.2334,
|
|
"step": 25000
|
|
},
|
|
{
|
|
"epoch": 0.2008,
|
|
"grad_norm": 21.063215255737305,
|
|
"learning_rate": 8.025301204819279e-06,
|
|
"loss": 0.236,
|
|
"step": 25100
|
|
},
|
|
{
|
|
"epoch": 0.2016,
|
|
"grad_norm": 7.973589897155762,
|
|
"learning_rate": 8.017269076305221e-06,
|
|
"loss": 0.237,
|
|
"step": 25200
|
|
},
|
|
{
|
|
"epoch": 0.2024,
|
|
"grad_norm": 8.546441078186035,
|
|
"learning_rate": 8.009236947791165e-06,
|
|
"loss": 0.2591,
|
|
"step": 25300
|
|
},
|
|
{
|
|
"epoch": 0.2032,
|
|
"grad_norm": 14.01245403289795,
|
|
"learning_rate": 8.001204819277109e-06,
|
|
"loss": 0.2364,
|
|
"step": 25400
|
|
},
|
|
{
|
|
"epoch": 0.204,
|
|
"grad_norm": 3.486008405685425,
|
|
"learning_rate": 7.993172690763053e-06,
|
|
"loss": 0.2349,
|
|
"step": 25500
|
|
},
|
|
{
|
|
"epoch": 0.2048,
|
|
"grad_norm": 8.98222827911377,
|
|
"learning_rate": 7.985140562248997e-06,
|
|
"loss": 0.2138,
|
|
"step": 25600
|
|
},
|
|
{
|
|
"epoch": 0.2056,
|
|
"grad_norm": 10.626484870910645,
|
|
"learning_rate": 7.97710843373494e-06,
|
|
"loss": 0.2229,
|
|
"step": 25700
|
|
},
|
|
{
|
|
"epoch": 0.2064,
|
|
"grad_norm": 6.444272041320801,
|
|
"learning_rate": 7.969076305220885e-06,
|
|
"loss": 0.2145,
|
|
"step": 25800
|
|
},
|
|
{
|
|
"epoch": 0.2072,
|
|
"grad_norm": 6.722916603088379,
|
|
"learning_rate": 7.961044176706828e-06,
|
|
"loss": 0.2189,
|
|
"step": 25900
|
|
},
|
|
{
|
|
"epoch": 0.208,
|
|
"grad_norm": 10.45226001739502,
|
|
"learning_rate": 7.953012048192772e-06,
|
|
"loss": 0.214,
|
|
"step": 26000
|
|
},
|
|
{
|
|
"epoch": 0.2088,
|
|
"grad_norm": 7.136618614196777,
|
|
"learning_rate": 7.944979919678716e-06,
|
|
"loss": 0.2147,
|
|
"step": 26100
|
|
},
|
|
{
|
|
"epoch": 0.2096,
|
|
"grad_norm": 5.5284953117370605,
|
|
"learning_rate": 7.937028112449799e-06,
|
|
"loss": 0.2236,
|
|
"step": 26200
|
|
},
|
|
{
|
|
"epoch": 0.2104,
|
|
"grad_norm": 7.089993000030518,
|
|
"learning_rate": 7.928995983935743e-06,
|
|
"loss": 0.2083,
|
|
"step": 26300
|
|
},
|
|
{
|
|
"epoch": 0.2112,
|
|
"grad_norm": 9.043608665466309,
|
|
"learning_rate": 7.920963855421687e-06,
|
|
"loss": 0.2478,
|
|
"step": 26400
|
|
},
|
|
{
|
|
"epoch": 0.212,
|
|
"grad_norm": 5.048727989196777,
|
|
"learning_rate": 7.91293172690763e-06,
|
|
"loss": 0.2063,
|
|
"step": 26500
|
|
},
|
|
{
|
|
"epoch": 0.2128,
|
|
"grad_norm": 9.47496509552002,
|
|
"learning_rate": 7.904899598393575e-06,
|
|
"loss": 0.2401,
|
|
"step": 26600
|
|
},
|
|
{
|
|
"epoch": 0.2136,
|
|
"grad_norm": 5.263045787811279,
|
|
"learning_rate": 7.896867469879519e-06,
|
|
"loss": 0.2373,
|
|
"step": 26700
|
|
},
|
|
{
|
|
"epoch": 0.2144,
|
|
"grad_norm": 10.800714492797852,
|
|
"learning_rate": 7.888835341365462e-06,
|
|
"loss": 0.2123,
|
|
"step": 26800
|
|
},
|
|
{
|
|
"epoch": 0.2152,
|
|
"grad_norm": 5.966376304626465,
|
|
"learning_rate": 7.880803212851406e-06,
|
|
"loss": 0.2335,
|
|
"step": 26900
|
|
},
|
|
{
|
|
"epoch": 0.216,
|
|
"grad_norm": 4.061577320098877,
|
|
"learning_rate": 7.87277108433735e-06,
|
|
"loss": 0.2183,
|
|
"step": 27000
|
|
},
|
|
{
|
|
"epoch": 0.2168,
|
|
"grad_norm": 10.104215621948242,
|
|
"learning_rate": 7.864738955823293e-06,
|
|
"loss": 0.1969,
|
|
"step": 27100
|
|
},
|
|
{
|
|
"epoch": 0.2176,
|
|
"grad_norm": 6.445654392242432,
|
|
"learning_rate": 7.856706827309236e-06,
|
|
"loss": 0.2144,
|
|
"step": 27200
|
|
},
|
|
{
|
|
"epoch": 0.2184,
|
|
"grad_norm": 9.509758949279785,
|
|
"learning_rate": 7.84867469879518e-06,
|
|
"loss": 0.2022,
|
|
"step": 27300
|
|
},
|
|
{
|
|
"epoch": 0.2192,
|
|
"grad_norm": 11.04575252532959,
|
|
"learning_rate": 7.840642570281124e-06,
|
|
"loss": 0.2153,
|
|
"step": 27400
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"grad_norm": 10.30285358428955,
|
|
"learning_rate": 7.832610441767068e-06,
|
|
"loss": 0.2269,
|
|
"step": 27500
|
|
},
|
|
{
|
|
"epoch": 0.2208,
|
|
"grad_norm": 9.692445755004883,
|
|
"learning_rate": 7.824578313253012e-06,
|
|
"loss": 0.2372,
|
|
"step": 27600
|
|
},
|
|
{
|
|
"epoch": 0.2216,
|
|
"grad_norm": 7.303224563598633,
|
|
"learning_rate": 7.816546184738956e-06,
|
|
"loss": 0.2229,
|
|
"step": 27700
|
|
},
|
|
{
|
|
"epoch": 0.2224,
|
|
"grad_norm": 7.364916801452637,
|
|
"learning_rate": 7.8085140562249e-06,
|
|
"loss": 0.2312,
|
|
"step": 27800
|
|
},
|
|
{
|
|
"epoch": 0.2232,
|
|
"grad_norm": 8.000985145568848,
|
|
"learning_rate": 7.800481927710844e-06,
|
|
"loss": 0.2146,
|
|
"step": 27900
|
|
},
|
|
{
|
|
"epoch": 0.224,
|
|
"grad_norm": 7.1691131591796875,
|
|
"learning_rate": 7.792449799196788e-06,
|
|
"loss": 0.2328,
|
|
"step": 28000
|
|
},
|
|
{
|
|
"epoch": 0.224,
|
|
"eval_test1_cer": 0.06472664127268653,
|
|
"eval_test1_cer_norm": 0.04759046269839364,
|
|
"eval_test1_loss": 0.21469315886497498,
|
|
"eval_test1_runtime": 1210.6322,
|
|
"eval_test1_samples_per_second": 2.065,
|
|
"eval_test1_steps_per_second": 0.516,
|
|
"eval_test1_wer": 0.18274585264876528,
|
|
"eval_test1_wer_norm": 0.12037172331161052,
|
|
"step": 28000
|
|
},
|
|
{
|
|
"epoch": 0.224,
|
|
"eval_test2_cer": 0.17353100384514877,
|
|
"eval_test2_cer_norm": 0.13871242640223116,
|
|
"eval_test2_loss": 0.37395521998405457,
|
|
"eval_test2_runtime": 1313.1792,
|
|
"eval_test2_samples_per_second": 1.904,
|
|
"eval_test2_steps_per_second": 0.476,
|
|
"eval_test2_wer": 0.35056649118791483,
|
|
"eval_test2_wer_norm": 0.2835205134082054,
|
|
"step": 28000
|
|
},
|
|
{
|
|
"epoch": 0.2248,
|
|
"grad_norm": 5.209496021270752,
|
|
"learning_rate": 7.784417670682732e-06,
|
|
"loss": 0.2212,
|
|
"step": 28100
|
|
},
|
|
{
|
|
"epoch": 0.2256,
|
|
"grad_norm": 6.241885185241699,
|
|
"learning_rate": 7.776465863453816e-06,
|
|
"loss": 0.2175,
|
|
"step": 28200
|
|
},
|
|
{
|
|
"epoch": 0.2264,
|
|
"grad_norm": 6.768528461456299,
|
|
"learning_rate": 7.76843373493976e-06,
|
|
"loss": 0.2111,
|
|
"step": 28300
|
|
},
|
|
{
|
|
"epoch": 0.2272,
|
|
"grad_norm": 16.620553970336914,
|
|
"learning_rate": 7.760401606425704e-06,
|
|
"loss": 0.2201,
|
|
"step": 28400
|
|
},
|
|
{
|
|
"epoch": 0.228,
|
|
"grad_norm": 5.785097122192383,
|
|
"learning_rate": 7.752369477911648e-06,
|
|
"loss": 0.2195,
|
|
"step": 28500
|
|
},
|
|
{
|
|
"epoch": 0.2288,
|
|
"grad_norm": 7.3884663581848145,
|
|
"learning_rate": 7.744337349397592e-06,
|
|
"loss": 0.2107,
|
|
"step": 28600
|
|
},
|
|
{
|
|
"epoch": 0.2296,
|
|
"grad_norm": 6.965432643890381,
|
|
"learning_rate": 7.736305220883534e-06,
|
|
"loss": 0.247,
|
|
"step": 28700
|
|
},
|
|
{
|
|
"epoch": 0.2304,
|
|
"grad_norm": 7.899501323699951,
|
|
"learning_rate": 7.728273092369478e-06,
|
|
"loss": 0.2315,
|
|
"step": 28800
|
|
},
|
|
{
|
|
"epoch": 0.2312,
|
|
"grad_norm": 8.336009979248047,
|
|
"learning_rate": 7.720240963855422e-06,
|
|
"loss": 0.2262,
|
|
"step": 28900
|
|
},
|
|
{
|
|
"epoch": 0.232,
|
|
"grad_norm": 7.90683126449585,
|
|
"learning_rate": 7.712208835341366e-06,
|
|
"loss": 0.2322,
|
|
"step": 29000
|
|
},
|
|
{
|
|
"epoch": 0.2328,
|
|
"grad_norm": 6.586302757263184,
|
|
"learning_rate": 7.70417670682731e-06,
|
|
"loss": 0.2341,
|
|
"step": 29100
|
|
},
|
|
{
|
|
"epoch": 0.2336,
|
|
"grad_norm": 6.594479084014893,
|
|
"learning_rate": 7.696144578313254e-06,
|
|
"loss": 0.2226,
|
|
"step": 29200
|
|
},
|
|
{
|
|
"epoch": 0.2344,
|
|
"grad_norm": 7.850062370300293,
|
|
"learning_rate": 7.688112449799197e-06,
|
|
"loss": 0.2094,
|
|
"step": 29300
|
|
},
|
|
{
|
|
"epoch": 0.2352,
|
|
"grad_norm": 12.644471168518066,
|
|
"learning_rate": 7.680080321285141e-06,
|
|
"loss": 0.226,
|
|
"step": 29400
|
|
},
|
|
{
|
|
"epoch": 0.236,
|
|
"grad_norm": 7.289064884185791,
|
|
"learning_rate": 7.672048192771085e-06,
|
|
"loss": 0.2214,
|
|
"step": 29500
|
|
},
|
|
{
|
|
"epoch": 0.2368,
|
|
"grad_norm": 10.635286331176758,
|
|
"learning_rate": 7.664016064257028e-06,
|
|
"loss": 0.2142,
|
|
"step": 29600
|
|
},
|
|
{
|
|
"epoch": 0.2376,
|
|
"grad_norm": 5.214193344116211,
|
|
"learning_rate": 7.655983935742971e-06,
|
|
"loss": 0.2218,
|
|
"step": 29700
|
|
},
|
|
{
|
|
"epoch": 0.2384,
|
|
"grad_norm": 6.800753593444824,
|
|
"learning_rate": 7.647951807228915e-06,
|
|
"loss": 0.2179,
|
|
"step": 29800
|
|
},
|
|
{
|
|
"epoch": 0.2392,
|
|
"grad_norm": 8.79254150390625,
|
|
"learning_rate": 7.63991967871486e-06,
|
|
"loss": 0.2066,
|
|
"step": 29900
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"grad_norm": 7.172235488891602,
|
|
"learning_rate": 7.631887550200803e-06,
|
|
"loss": 0.2207,
|
|
"step": 30000
|
|
},
|
|
{
|
|
"epoch": 0.2408,
|
|
"grad_norm": 5.066342830657959,
|
|
"learning_rate": 7.623855421686748e-06,
|
|
"loss": 0.2034,
|
|
"step": 30100
|
|
},
|
|
{
|
|
"epoch": 0.2416,
|
|
"grad_norm": 5.975930690765381,
|
|
"learning_rate": 7.615903614457832e-06,
|
|
"loss": 0.2136,
|
|
"step": 30200
|
|
},
|
|
{
|
|
"epoch": 0.2424,
|
|
"grad_norm": 5.854097366333008,
|
|
"learning_rate": 7.607871485943775e-06,
|
|
"loss": 0.2174,
|
|
"step": 30300
|
|
},
|
|
{
|
|
"epoch": 0.2432,
|
|
"grad_norm": 6.811086177825928,
|
|
"learning_rate": 7.599839357429719e-06,
|
|
"loss": 0.2232,
|
|
"step": 30400
|
|
},
|
|
{
|
|
"epoch": 0.244,
|
|
"grad_norm": 7.538053035736084,
|
|
"learning_rate": 7.591807228915663e-06,
|
|
"loss": 0.2003,
|
|
"step": 30500
|
|
},
|
|
{
|
|
"epoch": 0.2448,
|
|
"grad_norm": 5.63380241394043,
|
|
"learning_rate": 7.583775100401607e-06,
|
|
"loss": 0.22,
|
|
"step": 30600
|
|
},
|
|
{
|
|
"epoch": 0.2456,
|
|
"grad_norm": 7.308541297912598,
|
|
"learning_rate": 7.575742971887551e-06,
|
|
"loss": 0.194,
|
|
"step": 30700
|
|
},
|
|
{
|
|
"epoch": 0.2464,
|
|
"grad_norm": 12.48105239868164,
|
|
"learning_rate": 7.567710843373495e-06,
|
|
"loss": 0.2254,
|
|
"step": 30800
|
|
},
|
|
{
|
|
"epoch": 0.2472,
|
|
"grad_norm": 6.443123817443848,
|
|
"learning_rate": 7.559678714859439e-06,
|
|
"loss": 0.2157,
|
|
"step": 30900
|
|
},
|
|
{
|
|
"epoch": 0.248,
|
|
"grad_norm": 5.197587013244629,
|
|
"learning_rate": 7.551646586345383e-06,
|
|
"loss": 0.2095,
|
|
"step": 31000
|
|
},
|
|
{
|
|
"epoch": 0.2488,
|
|
"grad_norm": 5.799818515777588,
|
|
"learning_rate": 7.543614457831327e-06,
|
|
"loss": 0.2432,
|
|
"step": 31100
|
|
},
|
|
{
|
|
"epoch": 0.2496,
|
|
"grad_norm": 6.902918338775635,
|
|
"learning_rate": 7.535582329317269e-06,
|
|
"loss": 0.2106,
|
|
"step": 31200
|
|
},
|
|
{
|
|
"epoch": 0.2504,
|
|
"grad_norm": 12.562540054321289,
|
|
"learning_rate": 7.527550200803213e-06,
|
|
"loss": 0.2241,
|
|
"step": 31300
|
|
},
|
|
{
|
|
"epoch": 0.2512,
|
|
"grad_norm": 6.103118419647217,
|
|
"learning_rate": 7.519518072289157e-06,
|
|
"loss": 0.2141,
|
|
"step": 31400
|
|
},
|
|
{
|
|
"epoch": 0.252,
|
|
"grad_norm": 3.2933740615844727,
|
|
"learning_rate": 7.511485943775101e-06,
|
|
"loss": 0.2079,
|
|
"step": 31500
|
|
},
|
|
{
|
|
"epoch": 0.2528,
|
|
"grad_norm": 4.528769493103027,
|
|
"learning_rate": 7.503453815261045e-06,
|
|
"loss": 0.2072,
|
|
"step": 31600
|
|
},
|
|
{
|
|
"epoch": 0.2536,
|
|
"grad_norm": 5.217092990875244,
|
|
"learning_rate": 7.4954216867469886e-06,
|
|
"loss": 0.2165,
|
|
"step": 31700
|
|
},
|
|
{
|
|
"epoch": 0.2544,
|
|
"grad_norm": 6.123150825500488,
|
|
"learning_rate": 7.4873895582329325e-06,
|
|
"loss": 0.1907,
|
|
"step": 31800
|
|
},
|
|
{
|
|
"epoch": 0.2552,
|
|
"grad_norm": 4.618309020996094,
|
|
"learning_rate": 7.479357429718876e-06,
|
|
"loss": 0.2214,
|
|
"step": 31900
|
|
},
|
|
{
|
|
"epoch": 0.256,
|
|
"grad_norm": 8.602248191833496,
|
|
"learning_rate": 7.47132530120482e-06,
|
|
"loss": 0.2075,
|
|
"step": 32000
|
|
},
|
|
{
|
|
"epoch": 0.256,
|
|
"eval_test1_cer": 0.06202853088356113,
|
|
"eval_test1_cer_norm": 0.046020121497346744,
|
|
"eval_test1_loss": 0.21270166337490082,
|
|
"eval_test1_runtime": 1209.2653,
|
|
"eval_test1_samples_per_second": 2.067,
|
|
"eval_test1_steps_per_second": 0.517,
|
|
"eval_test1_wer": 0.18376629056240706,
|
|
"eval_test1_wer_norm": 0.12262193518220871,
|
|
"step": 32000
|
|
},
|
|
{
|
|
"epoch": 0.256,
|
|
"eval_test2_cer": 0.12037555530667861,
|
|
"eval_test2_cer_norm": 0.09361442516268981,
|
|
"eval_test2_loss": 0.3685109317302704,
|
|
"eval_test2_runtime": 1222.5262,
|
|
"eval_test2_samples_per_second": 2.045,
|
|
"eval_test2_steps_per_second": 0.511,
|
|
"eval_test2_wer": 0.2525177386129549,
|
|
"eval_test2_wer_norm": 0.18381847352738942,
|
|
"step": 32000
|
|
},
|
|
{
|
|
"epoch": 0.2568,
|
|
"grad_norm": 4.757772922515869,
|
|
"learning_rate": 7.463293172690763e-06,
|
|
"loss": 0.2147,
|
|
"step": 32100
|
|
},
|
|
{
|
|
"epoch": 0.2576,
|
|
"grad_norm": 2.9504878520965576,
|
|
"learning_rate": 7.455261044176707e-06,
|
|
"loss": 0.225,
|
|
"step": 32200
|
|
},
|
|
{
|
|
"epoch": 0.2584,
|
|
"grad_norm": 6.651636600494385,
|
|
"learning_rate": 7.447228915662651e-06,
|
|
"loss": 0.2233,
|
|
"step": 32300
|
|
},
|
|
{
|
|
"epoch": 0.2592,
|
|
"grad_norm": 6.254730224609375,
|
|
"learning_rate": 7.4392771084337355e-06,
|
|
"loss": 0.2067,
|
|
"step": 32400
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"grad_norm": 8.979650497436523,
|
|
"learning_rate": 7.4312449799196795e-06,
|
|
"loss": 0.2281,
|
|
"step": 32500
|
|
},
|
|
{
|
|
"epoch": 0.2608,
|
|
"grad_norm": 4.179977893829346,
|
|
"learning_rate": 7.423212851405623e-06,
|
|
"loss": 0.1986,
|
|
"step": 32600
|
|
},
|
|
{
|
|
"epoch": 0.2616,
|
|
"grad_norm": 5.462186336517334,
|
|
"learning_rate": 7.415180722891567e-06,
|
|
"loss": 0.2104,
|
|
"step": 32700
|
|
},
|
|
{
|
|
"epoch": 0.2624,
|
|
"grad_norm": 7.806402683258057,
|
|
"learning_rate": 7.40714859437751e-06,
|
|
"loss": 0.2153,
|
|
"step": 32800
|
|
},
|
|
{
|
|
"epoch": 0.2632,
|
|
"grad_norm": 5.667836666107178,
|
|
"learning_rate": 7.399116465863454e-06,
|
|
"loss": 0.1883,
|
|
"step": 32900
|
|
},
|
|
{
|
|
"epoch": 0.264,
|
|
"grad_norm": 8.295492172241211,
|
|
"learning_rate": 7.391084337349398e-06,
|
|
"loss": 0.2139,
|
|
"step": 33000
|
|
},
|
|
{
|
|
"epoch": 0.2648,
|
|
"grad_norm": 10.960628509521484,
|
|
"learning_rate": 7.383052208835342e-06,
|
|
"loss": 0.2415,
|
|
"step": 33100
|
|
},
|
|
{
|
|
"epoch": 0.2656,
|
|
"grad_norm": 8.189926147460938,
|
|
"learning_rate": 7.375020080321286e-06,
|
|
"loss": 0.2168,
|
|
"step": 33200
|
|
},
|
|
{
|
|
"epoch": 0.2664,
|
|
"grad_norm": 8.110308647155762,
|
|
"learning_rate": 7.36698795180723e-06,
|
|
"loss": 0.2169,
|
|
"step": 33300
|
|
},
|
|
{
|
|
"epoch": 0.2672,
|
|
"grad_norm": 7.833503723144531,
|
|
"learning_rate": 7.358955823293174e-06,
|
|
"loss": 0.2131,
|
|
"step": 33400
|
|
},
|
|
{
|
|
"epoch": 0.268,
|
|
"grad_norm": 4.76417350769043,
|
|
"learning_rate": 7.350923694779117e-06,
|
|
"loss": 0.2093,
|
|
"step": 33500
|
|
},
|
|
{
|
|
"epoch": 0.2688,
|
|
"grad_norm": 6.401252269744873,
|
|
"learning_rate": 7.342891566265061e-06,
|
|
"loss": 0.2128,
|
|
"step": 33600
|
|
},
|
|
{
|
|
"epoch": 0.2696,
|
|
"grad_norm": 4.773887634277344,
|
|
"learning_rate": 7.334859437751004e-06,
|
|
"loss": 0.2042,
|
|
"step": 33700
|
|
},
|
|
{
|
|
"epoch": 0.2704,
|
|
"grad_norm": 3.329989433288574,
|
|
"learning_rate": 7.326827309236948e-06,
|
|
"loss": 0.1916,
|
|
"step": 33800
|
|
},
|
|
{
|
|
"epoch": 0.2712,
|
|
"grad_norm": 5.542182445526123,
|
|
"learning_rate": 7.318795180722892e-06,
|
|
"loss": 0.2039,
|
|
"step": 33900
|
|
},
|
|
{
|
|
"epoch": 0.272,
|
|
"grad_norm": 10.034430503845215,
|
|
"learning_rate": 7.310763052208836e-06,
|
|
"loss": 0.207,
|
|
"step": 34000
|
|
},
|
|
{
|
|
"epoch": 0.2728,
|
|
"grad_norm": 6.576022624969482,
|
|
"learning_rate": 7.30273092369478e-06,
|
|
"loss": 0.2136,
|
|
"step": 34100
|
|
},
|
|
{
|
|
"epoch": 0.2736,
|
|
"grad_norm": 5.687163829803467,
|
|
"learning_rate": 7.2946987951807236e-06,
|
|
"loss": 0.1901,
|
|
"step": 34200
|
|
},
|
|
{
|
|
"epoch": 0.2744,
|
|
"grad_norm": 7.63690710067749,
|
|
"learning_rate": 7.2866666666666675e-06,
|
|
"loss": 0.2094,
|
|
"step": 34300
|
|
},
|
|
{
|
|
"epoch": 0.2752,
|
|
"grad_norm": 17.66728401184082,
|
|
"learning_rate": 7.278714859437752e-06,
|
|
"loss": 0.2149,
|
|
"step": 34400
|
|
},
|
|
{
|
|
"epoch": 0.276,
|
|
"grad_norm": 6.838101863861084,
|
|
"learning_rate": 7.270682730923695e-06,
|
|
"loss": 0.2181,
|
|
"step": 34500
|
|
},
|
|
{
|
|
"epoch": 0.2768,
|
|
"grad_norm": 9.850558280944824,
|
|
"learning_rate": 7.262650602409639e-06,
|
|
"loss": 0.1968,
|
|
"step": 34600
|
|
},
|
|
{
|
|
"epoch": 0.2776,
|
|
"grad_norm": 5.048129558563232,
|
|
"learning_rate": 7.254618473895583e-06,
|
|
"loss": 0.2264,
|
|
"step": 34700
|
|
},
|
|
{
|
|
"epoch": 0.2784,
|
|
"grad_norm": 14.177302360534668,
|
|
"learning_rate": 7.246586345381527e-06,
|
|
"loss": 0.2079,
|
|
"step": 34800
|
|
},
|
|
{
|
|
"epoch": 0.2792,
|
|
"grad_norm": 6.149969577789307,
|
|
"learning_rate": 7.2385542168674706e-06,
|
|
"loss": 0.2301,
|
|
"step": 34900
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"grad_norm": 5.608641624450684,
|
|
"learning_rate": 7.2305220883534145e-06,
|
|
"loss": 0.2208,
|
|
"step": 35000
|
|
},
|
|
{
|
|
"epoch": 0.2808,
|
|
"grad_norm": 6.874965667724609,
|
|
"learning_rate": 7.222489959839358e-06,
|
|
"loss": 0.208,
|
|
"step": 35100
|
|
},
|
|
{
|
|
"epoch": 0.2816,
|
|
"grad_norm": 5.777594089508057,
|
|
"learning_rate": 7.214457831325302e-06,
|
|
"loss": 0.1997,
|
|
"step": 35200
|
|
},
|
|
{
|
|
"epoch": 0.2824,
|
|
"grad_norm": 2.708245277404785,
|
|
"learning_rate": 7.206425702811246e-06,
|
|
"loss": 0.2111,
|
|
"step": 35300
|
|
},
|
|
{
|
|
"epoch": 0.2832,
|
|
"grad_norm": 3.1390602588653564,
|
|
"learning_rate": 7.1983935742971885e-06,
|
|
"loss": 0.2094,
|
|
"step": 35400
|
|
},
|
|
{
|
|
"epoch": 0.284,
|
|
"grad_norm": 4.028870105743408,
|
|
"learning_rate": 7.190361445783132e-06,
|
|
"loss": 0.2246,
|
|
"step": 35500
|
|
},
|
|
{
|
|
"epoch": 0.2848,
|
|
"grad_norm": 15.425012588500977,
|
|
"learning_rate": 7.182329317269076e-06,
|
|
"loss": 0.1956,
|
|
"step": 35600
|
|
},
|
|
{
|
|
"epoch": 0.2856,
|
|
"grad_norm": 5.734979152679443,
|
|
"learning_rate": 7.17429718875502e-06,
|
|
"loss": 0.2125,
|
|
"step": 35700
|
|
},
|
|
{
|
|
"epoch": 0.2864,
|
|
"grad_norm": 5.9506916999816895,
|
|
"learning_rate": 7.166265060240964e-06,
|
|
"loss": 0.205,
|
|
"step": 35800
|
|
},
|
|
{
|
|
"epoch": 0.2872,
|
|
"grad_norm": 6.654294013977051,
|
|
"learning_rate": 7.158232931726908e-06,
|
|
"loss": 0.2183,
|
|
"step": 35900
|
|
},
|
|
{
|
|
"epoch": 0.288,
|
|
"grad_norm": 4.6381754875183105,
|
|
"learning_rate": 7.150200803212852e-06,
|
|
"loss": 0.2108,
|
|
"step": 36000
|
|
},
|
|
{
|
|
"epoch": 0.288,
|
|
"eval_test1_cer": 0.05035850324893569,
|
|
"eval_test1_cer_norm": 0.034854851489903235,
|
|
"eval_test1_loss": 0.2031903713941574,
|
|
"eval_test1_runtime": 1180.1384,
|
|
"eval_test1_samples_per_second": 2.118,
|
|
"eval_test1_steps_per_second": 0.53,
|
|
"eval_test1_wer": 0.15638939910784572,
|
|
"eval_test1_wer_norm": 0.09538560448873433,
|
|
"step": 36000
|
|
},
|
|
{
|
|
"epoch": 0.288,
|
|
"eval_test2_cer": 0.15337197147870235,
|
|
"eval_test2_cer_norm": 0.11982878834831112,
|
|
"eval_test2_loss": 0.36121314764022827,
|
|
"eval_test2_runtime": 1277.8248,
|
|
"eval_test2_samples_per_second": 1.956,
|
|
"eval_test2_steps_per_second": 0.489,
|
|
"eval_test2_wer": 0.2875944151979858,
|
|
"eval_test2_wer_norm": 0.22060508824203529,
|
|
"step": 36000
|
|
},
|
|
{
|
|
"epoch": 0.2888,
|
|
"grad_norm": 6.482578277587891,
|
|
"learning_rate": 7.142168674698796e-06,
|
|
"loss": 0.2319,
|
|
"step": 36100
|
|
},
|
|
{
|
|
"epoch": 0.2896,
|
|
"grad_norm": 6.14495849609375,
|
|
"learning_rate": 7.13413654618474e-06,
|
|
"loss": 0.2008,
|
|
"step": 36200
|
|
},
|
|
{
|
|
"epoch": 0.2904,
|
|
"grad_norm": 7.641204357147217,
|
|
"learning_rate": 7.126104417670683e-06,
|
|
"loss": 0.2154,
|
|
"step": 36300
|
|
},
|
|
{
|
|
"epoch": 0.2912,
|
|
"grad_norm": 10.709319114685059,
|
|
"learning_rate": 7.118072289156627e-06,
|
|
"loss": 0.1996,
|
|
"step": 36400
|
|
},
|
|
{
|
|
"epoch": 0.292,
|
|
"grad_norm": 9.637004852294922,
|
|
"learning_rate": 7.110120481927711e-06,
|
|
"loss": 0.214,
|
|
"step": 36500
|
|
},
|
|
{
|
|
"epoch": 0.2928,
|
|
"grad_norm": 2.7376303672790527,
|
|
"learning_rate": 7.102088353413655e-06,
|
|
"loss": 0.2064,
|
|
"step": 36600
|
|
},
|
|
{
|
|
"epoch": 0.2936,
|
|
"grad_norm": 5.24441385269165,
|
|
"learning_rate": 7.094056224899599e-06,
|
|
"loss": 0.2105,
|
|
"step": 36700
|
|
},
|
|
{
|
|
"epoch": 0.2944,
|
|
"grad_norm": 2.739924907684326,
|
|
"learning_rate": 7.086024096385543e-06,
|
|
"loss": 0.231,
|
|
"step": 36800
|
|
},
|
|
{
|
|
"epoch": 0.2952,
|
|
"grad_norm": 10.428742408752441,
|
|
"learning_rate": 7.077991967871487e-06,
|
|
"loss": 0.1984,
|
|
"step": 36900
|
|
},
|
|
{
|
|
"epoch": 0.296,
|
|
"grad_norm": 7.877098560333252,
|
|
"learning_rate": 7.06995983935743e-06,
|
|
"loss": 0.2054,
|
|
"step": 37000
|
|
},
|
|
{
|
|
"epoch": 0.2968,
|
|
"grad_norm": 4.027400493621826,
|
|
"learning_rate": 7.061927710843374e-06,
|
|
"loss": 0.2088,
|
|
"step": 37100
|
|
},
|
|
{
|
|
"epoch": 0.2976,
|
|
"grad_norm": 2.8548176288604736,
|
|
"learning_rate": 7.053895582329318e-06,
|
|
"loss": 0.2126,
|
|
"step": 37200
|
|
},
|
|
{
|
|
"epoch": 0.2984,
|
|
"grad_norm": 8.31721019744873,
|
|
"learning_rate": 7.045863453815262e-06,
|
|
"loss": 0.2093,
|
|
"step": 37300
|
|
},
|
|
{
|
|
"epoch": 0.2992,
|
|
"grad_norm": 10.732782363891602,
|
|
"learning_rate": 7.0378313253012056e-06,
|
|
"loss": 0.2148,
|
|
"step": 37400
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"grad_norm": 6.584483623504639,
|
|
"learning_rate": 7.0297991967871495e-06,
|
|
"loss": 0.215,
|
|
"step": 37500
|
|
},
|
|
{
|
|
"epoch": 0.3008,
|
|
"grad_norm": 5.9521684646606445,
|
|
"learning_rate": 7.021767068273093e-06,
|
|
"loss": 0.1889,
|
|
"step": 37600
|
|
},
|
|
{
|
|
"epoch": 0.3016,
|
|
"grad_norm": 4.7626166343688965,
|
|
"learning_rate": 7.013734939759037e-06,
|
|
"loss": 0.2022,
|
|
"step": 37700
|
|
},
|
|
{
|
|
"epoch": 0.3024,
|
|
"grad_norm": 4.184934616088867,
|
|
"learning_rate": 7.005702811244981e-06,
|
|
"loss": 0.2237,
|
|
"step": 37800
|
|
},
|
|
{
|
|
"epoch": 0.3032,
|
|
"grad_norm": 5.2891998291015625,
|
|
"learning_rate": 6.9976706827309235e-06,
|
|
"loss": 0.1974,
|
|
"step": 37900
|
|
},
|
|
{
|
|
"epoch": 0.304,
|
|
"grad_norm": 12.890066146850586,
|
|
"learning_rate": 6.989638554216867e-06,
|
|
"loss": 0.2317,
|
|
"step": 38000
|
|
},
|
|
{
|
|
"epoch": 0.3048,
|
|
"grad_norm": 13.514941215515137,
|
|
"learning_rate": 6.981606425702811e-06,
|
|
"loss": 0.2175,
|
|
"step": 38100
|
|
},
|
|
{
|
|
"epoch": 0.3056,
|
|
"grad_norm": 5.781461238861084,
|
|
"learning_rate": 6.973574297188755e-06,
|
|
"loss": 0.1852,
|
|
"step": 38200
|
|
},
|
|
{
|
|
"epoch": 0.3064,
|
|
"grad_norm": 8.979798316955566,
|
|
"learning_rate": 6.965542168674699e-06,
|
|
"loss": 0.2156,
|
|
"step": 38300
|
|
},
|
|
{
|
|
"epoch": 0.3072,
|
|
"grad_norm": 6.392210006713867,
|
|
"learning_rate": 6.957510040160643e-06,
|
|
"loss": 0.1815,
|
|
"step": 38400
|
|
},
|
|
{
|
|
"epoch": 0.308,
|
|
"grad_norm": 7.205411434173584,
|
|
"learning_rate": 6.949558232931728e-06,
|
|
"loss": 0.2074,
|
|
"step": 38500
|
|
},
|
|
{
|
|
"epoch": 0.3088,
|
|
"grad_norm": 6.6332621574401855,
|
|
"learning_rate": 6.9415261044176704e-06,
|
|
"loss": 0.2037,
|
|
"step": 38600
|
|
},
|
|
{
|
|
"epoch": 0.3096,
|
|
"grad_norm": 6.583282470703125,
|
|
"learning_rate": 6.933493975903614e-06,
|
|
"loss": 0.2161,
|
|
"step": 38700
|
|
},
|
|
{
|
|
"epoch": 0.3104,
|
|
"grad_norm": 5.363083362579346,
|
|
"learning_rate": 6.925461847389558e-06,
|
|
"loss": 0.1842,
|
|
"step": 38800
|
|
},
|
|
{
|
|
"epoch": 0.3112,
|
|
"grad_norm": 11.732734680175781,
|
|
"learning_rate": 6.917429718875502e-06,
|
|
"loss": 0.1955,
|
|
"step": 38900
|
|
},
|
|
{
|
|
"epoch": 0.312,
|
|
"grad_norm": 4.4274444580078125,
|
|
"learning_rate": 6.909397590361446e-06,
|
|
"loss": 0.2065,
|
|
"step": 39000
|
|
},
|
|
{
|
|
"epoch": 0.3128,
|
|
"grad_norm": 8.530516624450684,
|
|
"learning_rate": 6.90136546184739e-06,
|
|
"loss": 0.2015,
|
|
"step": 39100
|
|
},
|
|
{
|
|
"epoch": 0.3136,
|
|
"grad_norm": 4.273055076599121,
|
|
"learning_rate": 6.893333333333334e-06,
|
|
"loss": 0.1997,
|
|
"step": 39200
|
|
},
|
|
{
|
|
"epoch": 0.3144,
|
|
"grad_norm": 5.483185291290283,
|
|
"learning_rate": 6.885301204819278e-06,
|
|
"loss": 0.1987,
|
|
"step": 39300
|
|
},
|
|
{
|
|
"epoch": 0.3152,
|
|
"grad_norm": 8.107812881469727,
|
|
"learning_rate": 6.877269076305222e-06,
|
|
"loss": 0.2146,
|
|
"step": 39400
|
|
},
|
|
{
|
|
"epoch": 0.316,
|
|
"grad_norm": 11.944705963134766,
|
|
"learning_rate": 6.869236947791165e-06,
|
|
"loss": 0.2243,
|
|
"step": 39500
|
|
},
|
|
{
|
|
"epoch": 0.3168,
|
|
"grad_norm": 6.820212364196777,
|
|
"learning_rate": 6.861204819277109e-06,
|
|
"loss": 0.2106,
|
|
"step": 39600
|
|
},
|
|
{
|
|
"epoch": 0.3176,
|
|
"grad_norm": 16.74178123474121,
|
|
"learning_rate": 6.853172690763053e-06,
|
|
"loss": 0.208,
|
|
"step": 39700
|
|
},
|
|
{
|
|
"epoch": 0.3184,
|
|
"grad_norm": 8.6283597946167,
|
|
"learning_rate": 6.845140562248997e-06,
|
|
"loss": 0.191,
|
|
"step": 39800
|
|
},
|
|
{
|
|
"epoch": 0.3192,
|
|
"grad_norm": 6.8946661949157715,
|
|
"learning_rate": 6.8371084337349406e-06,
|
|
"loss": 0.2055,
|
|
"step": 39900
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"grad_norm": 6.649729251861572,
|
|
"learning_rate": 6.8290763052208845e-06,
|
|
"loss": 0.2005,
|
|
"step": 40000
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_test1_cer": 0.04994771827619688,
|
|
"eval_test1_cer_norm": 0.03569524815713017,
|
|
"eval_test1_loss": 0.20380495488643646,
|
|
"eval_test1_runtime": 1179.5149,
|
|
"eval_test1_samples_per_second": 2.12,
|
|
"eval_test1_steps_per_second": 0.53,
|
|
"eval_test1_wer": 0.16318260007580396,
|
|
"eval_test1_wer_norm": 0.10292527543177767,
|
|
"step": 40000
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_test2_cer": 0.10917142643819763,
|
|
"eval_test2_cer_norm": 0.08240529129222188,
|
|
"eval_test2_loss": 0.36028367280960083,
|
|
"eval_test2_runtime": 1209.014,
|
|
"eval_test2_samples_per_second": 2.068,
|
|
"eval_test2_steps_per_second": 0.517,
|
|
"eval_test2_wer": 0.24101625085831999,
|
|
"eval_test2_wer_norm": 0.17456451982580792,
|
|
"step": 40000
|
|
},
|
|
{
|
|
"epoch": 0.3208,
|
|
"grad_norm": 6.898298740386963,
|
|
"learning_rate": 6.821044176706828e-06,
|
|
"loss": 0.1872,
|
|
"step": 40100
|
|
},
|
|
{
|
|
"epoch": 0.3216,
|
|
"grad_norm": 5.272704124450684,
|
|
"learning_rate": 6.813012048192772e-06,
|
|
"loss": 0.207,
|
|
"step": 40200
|
|
},
|
|
{
|
|
"epoch": 0.3224,
|
|
"grad_norm": 3.6596715450286865,
|
|
"learning_rate": 6.804979919678716e-06,
|
|
"loss": 0.2143,
|
|
"step": 40300
|
|
},
|
|
{
|
|
"epoch": 0.3232,
|
|
"grad_norm": 3.9344301223754883,
|
|
"learning_rate": 6.7969477911646585e-06,
|
|
"loss": 0.1844,
|
|
"step": 40400
|
|
},
|
|
{
|
|
"epoch": 0.324,
|
|
"grad_norm": 10.56173324584961,
|
|
"learning_rate": 6.788915662650602e-06,
|
|
"loss": 0.2096,
|
|
"step": 40500
|
|
},
|
|
{
|
|
"epoch": 0.3248,
|
|
"grad_norm": 4.7323713302612305,
|
|
"learning_rate": 6.780883534136546e-06,
|
|
"loss": 0.2012,
|
|
"step": 40600
|
|
},
|
|
{
|
|
"epoch": 0.3256,
|
|
"grad_norm": 13.328306198120117,
|
|
"learning_rate": 6.7729317269076315e-06,
|
|
"loss": 0.1891,
|
|
"step": 40700
|
|
},
|
|
{
|
|
"epoch": 0.3264,
|
|
"grad_norm": 6.378972053527832,
|
|
"learning_rate": 6.764899598393575e-06,
|
|
"loss": 0.2113,
|
|
"step": 40800
|
|
},
|
|
{
|
|
"epoch": 0.3272,
|
|
"grad_norm": 2.7503671646118164,
|
|
"learning_rate": 6.756867469879519e-06,
|
|
"loss": 0.2242,
|
|
"step": 40900
|
|
},
|
|
{
|
|
"epoch": 0.328,
|
|
"grad_norm": 7.0376296043396,
|
|
"learning_rate": 6.748835341365463e-06,
|
|
"loss": 0.1938,
|
|
"step": 41000
|
|
},
|
|
{
|
|
"epoch": 0.3288,
|
|
"grad_norm": 7.667542457580566,
|
|
"learning_rate": 6.740803212851406e-06,
|
|
"loss": 0.2199,
|
|
"step": 41100
|
|
},
|
|
{
|
|
"epoch": 0.3296,
|
|
"grad_norm": 7.959465026855469,
|
|
"learning_rate": 6.732771084337349e-06,
|
|
"loss": 0.2046,
|
|
"step": 41200
|
|
},
|
|
{
|
|
"epoch": 0.3304,
|
|
"grad_norm": 6.901830196380615,
|
|
"learning_rate": 6.724738955823293e-06,
|
|
"loss": 0.2257,
|
|
"step": 41300
|
|
},
|
|
{
|
|
"epoch": 0.3312,
|
|
"grad_norm": 3.7971105575561523,
|
|
"learning_rate": 6.716706827309237e-06,
|
|
"loss": 0.1972,
|
|
"step": 41400
|
|
},
|
|
{
|
|
"epoch": 0.332,
|
|
"grad_norm": 7.321595191955566,
|
|
"learning_rate": 6.708674698795181e-06,
|
|
"loss": 0.1896,
|
|
"step": 41500
|
|
},
|
|
{
|
|
"epoch": 0.3328,
|
|
"grad_norm": 4.369442462921143,
|
|
"learning_rate": 6.700642570281125e-06,
|
|
"loss": 0.2164,
|
|
"step": 41600
|
|
},
|
|
{
|
|
"epoch": 0.3336,
|
|
"grad_norm": 6.851342678070068,
|
|
"learning_rate": 6.692610441767069e-06,
|
|
"loss": 0.2097,
|
|
"step": 41700
|
|
},
|
|
{
|
|
"epoch": 0.3344,
|
|
"grad_norm": 7.6141228675842285,
|
|
"learning_rate": 6.684578313253013e-06,
|
|
"loss": 0.2214,
|
|
"step": 41800
|
|
},
|
|
{
|
|
"epoch": 0.3352,
|
|
"grad_norm": 2.4399728775024414,
|
|
"learning_rate": 6.676546184738957e-06,
|
|
"loss": 0.2117,
|
|
"step": 41900
|
|
},
|
|
{
|
|
"epoch": 0.336,
|
|
"grad_norm": 4.702713966369629,
|
|
"learning_rate": 6.668514056224901e-06,
|
|
"loss": 0.2122,
|
|
"step": 42000
|
|
},
|
|
{
|
|
"epoch": 0.3368,
|
|
"grad_norm": 13.61172866821289,
|
|
"learning_rate": 6.660481927710844e-06,
|
|
"loss": 0.2114,
|
|
"step": 42100
|
|
},
|
|
{
|
|
"epoch": 0.3376,
|
|
"grad_norm": 8.276487350463867,
|
|
"learning_rate": 6.652449799196788e-06,
|
|
"loss": 0.2041,
|
|
"step": 42200
|
|
},
|
|
{
|
|
"epoch": 0.3384,
|
|
"grad_norm": 7.3196539878845215,
|
|
"learning_rate": 6.644417670682732e-06,
|
|
"loss": 0.2107,
|
|
"step": 42300
|
|
},
|
|
{
|
|
"epoch": 0.3392,
|
|
"grad_norm": 6.650379657745361,
|
|
"learning_rate": 6.6363855421686756e-06,
|
|
"loss": 0.2114,
|
|
"step": 42400
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"grad_norm": 9.550190925598145,
|
|
"learning_rate": 6.628353413654619e-06,
|
|
"loss": 0.1889,
|
|
"step": 42500
|
|
},
|
|
{
|
|
"epoch": 0.3408,
|
|
"grad_norm": 4.141082286834717,
|
|
"learning_rate": 6.6203212851405626e-06,
|
|
"loss": 0.2006,
|
|
"step": 42600
|
|
},
|
|
{
|
|
"epoch": 0.3416,
|
|
"grad_norm": 8.406717300415039,
|
|
"learning_rate": 6.6122891566265065e-06,
|
|
"loss": 0.2016,
|
|
"step": 42700
|
|
},
|
|
{
|
|
"epoch": 0.3424,
|
|
"grad_norm": 11.38437557220459,
|
|
"learning_rate": 6.604337349397591e-06,
|
|
"loss": 0.2128,
|
|
"step": 42800
|
|
},
|
|
{
|
|
"epoch": 0.3432,
|
|
"grad_norm": 4.217543125152588,
|
|
"learning_rate": 6.596305220883535e-06,
|
|
"loss": 0.2126,
|
|
"step": 42900
|
|
},
|
|
{
|
|
"epoch": 0.344,
|
|
"grad_norm": 7.4914703369140625,
|
|
"learning_rate": 6.588273092369479e-06,
|
|
"loss": 0.1981,
|
|
"step": 43000
|
|
},
|
|
{
|
|
"epoch": 0.3448,
|
|
"grad_norm": 9.509758949279785,
|
|
"learning_rate": 6.580240963855422e-06,
|
|
"loss": 0.2041,
|
|
"step": 43100
|
|
},
|
|
{
|
|
"epoch": 0.3456,
|
|
"grad_norm": 6.551538467407227,
|
|
"learning_rate": 6.572208835341366e-06,
|
|
"loss": 0.2226,
|
|
"step": 43200
|
|
},
|
|
{
|
|
"epoch": 0.3464,
|
|
"grad_norm": 6.811914443969727,
|
|
"learning_rate": 6.5641767068273095e-06,
|
|
"loss": 0.2015,
|
|
"step": 43300
|
|
},
|
|
{
|
|
"epoch": 0.3472,
|
|
"grad_norm": 4.834348201751709,
|
|
"learning_rate": 6.5561445783132535e-06,
|
|
"loss": 0.2036,
|
|
"step": 43400
|
|
},
|
|
{
|
|
"epoch": 0.348,
|
|
"grad_norm": 10.421252250671387,
|
|
"learning_rate": 6.548112449799197e-06,
|
|
"loss": 0.2204,
|
|
"step": 43500
|
|
},
|
|
{
|
|
"epoch": 0.3488,
|
|
"grad_norm": 7.1061882972717285,
|
|
"learning_rate": 6.540080321285141e-06,
|
|
"loss": 0.1971,
|
|
"step": 43600
|
|
},
|
|
{
|
|
"epoch": 0.3496,
|
|
"grad_norm": 3.9507927894592285,
|
|
"learning_rate": 6.532048192771084e-06,
|
|
"loss": 0.217,
|
|
"step": 43700
|
|
},
|
|
{
|
|
"epoch": 0.3504,
|
|
"grad_norm": 6.255137920379639,
|
|
"learning_rate": 6.524016064257028e-06,
|
|
"loss": 0.199,
|
|
"step": 43800
|
|
},
|
|
{
|
|
"epoch": 0.3512,
|
|
"grad_norm": 6.308269500732422,
|
|
"learning_rate": 6.515983935742972e-06,
|
|
"loss": 0.2024,
|
|
"step": 43900
|
|
},
|
|
{
|
|
"epoch": 0.352,
|
|
"grad_norm": 2.7685184478759766,
|
|
"learning_rate": 6.507951807228916e-06,
|
|
"loss": 0.2076,
|
|
"step": 44000
|
|
},
|
|
{
|
|
"epoch": 0.352,
|
|
"eval_test1_cer": 0.05585275225931735,
|
|
"eval_test1_cer_norm": 0.04021898336014599,
|
|
"eval_test1_loss": 0.19787286221981049,
|
|
"eval_test1_runtime": 1201.2365,
|
|
"eval_test1_samples_per_second": 2.081,
|
|
"eval_test1_steps_per_second": 0.52,
|
|
"eval_test1_wer": 0.17079215137467565,
|
|
"eval_test1_wer_norm": 0.10894532277389754,
|
|
"step": 44000
|
|
},
|
|
{
|
|
"epoch": 0.352,
|
|
"eval_test2_cer": 0.11820565946167917,
|
|
"eval_test2_cer_norm": 0.0912660753021382,
|
|
"eval_test2_loss": 0.3588273823261261,
|
|
"eval_test2_runtime": 1231.6351,
|
|
"eval_test2_samples_per_second": 2.03,
|
|
"eval_test2_steps_per_second": 0.507,
|
|
"eval_test2_wer": 0.252431906614786,
|
|
"eval_test2_wer_norm": 0.18519367407746964,
|
|
"step": 44000
|
|
},
|
|
{
|
|
"epoch": 0.3528,
|
|
"grad_norm": 10.823402404785156,
|
|
"learning_rate": 6.49991967871486e-06,
|
|
"loss": 0.1976,
|
|
"step": 44100
|
|
},
|
|
{
|
|
"epoch": 0.3536,
|
|
"grad_norm": 3.3830673694610596,
|
|
"learning_rate": 6.491887550200804e-06,
|
|
"loss": 0.1952,
|
|
"step": 44200
|
|
},
|
|
{
|
|
"epoch": 0.3544,
|
|
"grad_norm": 8.530442237854004,
|
|
"learning_rate": 6.483855421686748e-06,
|
|
"loss": 0.2122,
|
|
"step": 44300
|
|
},
|
|
{
|
|
"epoch": 0.3552,
|
|
"grad_norm": 4.178443908691406,
|
|
"learning_rate": 6.475823293172692e-06,
|
|
"loss": 0.2014,
|
|
"step": 44400
|
|
},
|
|
{
|
|
"epoch": 0.356,
|
|
"grad_norm": 13.146326065063477,
|
|
"learning_rate": 6.467791164658636e-06,
|
|
"loss": 0.179,
|
|
"step": 44500
|
|
},
|
|
{
|
|
"epoch": 0.3568,
|
|
"grad_norm": 9.41238021850586,
|
|
"learning_rate": 6.459759036144578e-06,
|
|
"loss": 0.2135,
|
|
"step": 44600
|
|
},
|
|
{
|
|
"epoch": 0.3576,
|
|
"grad_norm": 4.255426406860352,
|
|
"learning_rate": 6.451726907630522e-06,
|
|
"loss": 0.1957,
|
|
"step": 44700
|
|
},
|
|
{
|
|
"epoch": 0.3584,
|
|
"grad_norm": 6.541229248046875,
|
|
"learning_rate": 6.443775100401607e-06,
|
|
"loss": 0.2,
|
|
"step": 44800
|
|
},
|
|
{
|
|
"epoch": 0.3592,
|
|
"grad_norm": 4.5669636726379395,
|
|
"learning_rate": 6.435742971887551e-06,
|
|
"loss": 0.187,
|
|
"step": 44900
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"grad_norm": 6.742759704589844,
|
|
"learning_rate": 6.427710843373495e-06,
|
|
"loss": 0.1961,
|
|
"step": 45000
|
|
},
|
|
{
|
|
"epoch": 0.3608,
|
|
"grad_norm": 6.676318168640137,
|
|
"learning_rate": 6.419678714859439e-06,
|
|
"loss": 0.2023,
|
|
"step": 45100
|
|
},
|
|
{
|
|
"epoch": 0.3616,
|
|
"grad_norm": 5.580842018127441,
|
|
"learning_rate": 6.411646586345383e-06,
|
|
"loss": 0.2257,
|
|
"step": 45200
|
|
},
|
|
{
|
|
"epoch": 0.3624,
|
|
"grad_norm": 12.327486038208008,
|
|
"learning_rate": 6.403614457831325e-06,
|
|
"loss": 0.1922,
|
|
"step": 45300
|
|
},
|
|
{
|
|
"epoch": 0.3632,
|
|
"grad_norm": 3.540501117706299,
|
|
"learning_rate": 6.395582329317269e-06,
|
|
"loss": 0.1914,
|
|
"step": 45400
|
|
},
|
|
{
|
|
"epoch": 0.364,
|
|
"grad_norm": 5.7018351554870605,
|
|
"learning_rate": 6.387550200803213e-06,
|
|
"loss": 0.1921,
|
|
"step": 45500
|
|
},
|
|
{
|
|
"epoch": 0.3648,
|
|
"grad_norm": 5.11850118637085,
|
|
"learning_rate": 6.379518072289157e-06,
|
|
"loss": 0.1847,
|
|
"step": 45600
|
|
},
|
|
{
|
|
"epoch": 0.3656,
|
|
"grad_norm": 9.415154457092285,
|
|
"learning_rate": 6.371485943775101e-06,
|
|
"loss": 0.1992,
|
|
"step": 45700
|
|
},
|
|
{
|
|
"epoch": 0.3664,
|
|
"grad_norm": 5.810830116271973,
|
|
"learning_rate": 6.3634538152610445e-06,
|
|
"loss": 0.2,
|
|
"step": 45800
|
|
},
|
|
{
|
|
"epoch": 0.3672,
|
|
"grad_norm": 6.580953598022461,
|
|
"learning_rate": 6.3554216867469885e-06,
|
|
"loss": 0.1853,
|
|
"step": 45900
|
|
},
|
|
{
|
|
"epoch": 0.368,
|
|
"grad_norm": 5.693816184997559,
|
|
"learning_rate": 6.347389558232932e-06,
|
|
"loss": 0.1935,
|
|
"step": 46000
|
|
},
|
|
{
|
|
"epoch": 0.3688,
|
|
"grad_norm": 3.0731186866760254,
|
|
"learning_rate": 6.339357429718876e-06,
|
|
"loss": 0.1779,
|
|
"step": 46100
|
|
},
|
|
{
|
|
"epoch": 0.3696,
|
|
"grad_norm": 7.017910003662109,
|
|
"learning_rate": 6.331325301204819e-06,
|
|
"loss": 0.189,
|
|
"step": 46200
|
|
},
|
|
{
|
|
"epoch": 0.3704,
|
|
"grad_norm": 13.572850227355957,
|
|
"learning_rate": 6.323293172690763e-06,
|
|
"loss": 0.1949,
|
|
"step": 46300
|
|
},
|
|
{
|
|
"epoch": 0.3712,
|
|
"grad_norm": 4.851452827453613,
|
|
"learning_rate": 6.315261044176707e-06,
|
|
"loss": 0.1865,
|
|
"step": 46400
|
|
},
|
|
{
|
|
"epoch": 0.372,
|
|
"grad_norm": 3.416046619415283,
|
|
"learning_rate": 6.307228915662651e-06,
|
|
"loss": 0.2003,
|
|
"step": 46500
|
|
},
|
|
{
|
|
"epoch": 0.3728,
|
|
"grad_norm": 7.885288715362549,
|
|
"learning_rate": 6.299196787148595e-06,
|
|
"loss": 0.2126,
|
|
"step": 46600
|
|
},
|
|
{
|
|
"epoch": 0.3736,
|
|
"grad_norm": 3.072671890258789,
|
|
"learning_rate": 6.291164658634539e-06,
|
|
"loss": 0.2006,
|
|
"step": 46700
|
|
},
|
|
{
|
|
"epoch": 0.3744,
|
|
"grad_norm": 3.8454980850219727,
|
|
"learning_rate": 6.283132530120483e-06,
|
|
"loss": 0.1911,
|
|
"step": 46800
|
|
},
|
|
{
|
|
"epoch": 0.3752,
|
|
"grad_norm": 1.3022770881652832,
|
|
"learning_rate": 6.275100401606427e-06,
|
|
"loss": 0.1904,
|
|
"step": 46900
|
|
},
|
|
{
|
|
"epoch": 0.376,
|
|
"grad_norm": 9.75496768951416,
|
|
"learning_rate": 6.267068273092371e-06,
|
|
"loss": 0.1902,
|
|
"step": 47000
|
|
},
|
|
{
|
|
"epoch": 0.3768,
|
|
"grad_norm": 5.822065353393555,
|
|
"learning_rate": 6.259116465863454e-06,
|
|
"loss": 0.2156,
|
|
"step": 47100
|
|
},
|
|
{
|
|
"epoch": 0.3776,
|
|
"grad_norm": 11.050053596496582,
|
|
"learning_rate": 6.251084337349398e-06,
|
|
"loss": 0.2068,
|
|
"step": 47200
|
|
},
|
|
{
|
|
"epoch": 0.3784,
|
|
"grad_norm": 11.641470909118652,
|
|
"learning_rate": 6.243052208835342e-06,
|
|
"loss": 0.1784,
|
|
"step": 47300
|
|
},
|
|
{
|
|
"epoch": 0.3792,
|
|
"grad_norm": 6.159931182861328,
|
|
"learning_rate": 6.235020080321286e-06,
|
|
"loss": 0.1869,
|
|
"step": 47400
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"grad_norm": 5.114069938659668,
|
|
"learning_rate": 6.22698795180723e-06,
|
|
"loss": 0.1897,
|
|
"step": 47500
|
|
},
|
|
{
|
|
"epoch": 0.3808,
|
|
"grad_norm": 4.50574254989624,
|
|
"learning_rate": 6.218955823293174e-06,
|
|
"loss": 0.1936,
|
|
"step": 47600
|
|
},
|
|
{
|
|
"epoch": 0.3816,
|
|
"grad_norm": 8.144811630249023,
|
|
"learning_rate": 6.210923694779118e-06,
|
|
"loss": 0.2113,
|
|
"step": 47700
|
|
},
|
|
{
|
|
"epoch": 0.3824,
|
|
"grad_norm": 4.306349754333496,
|
|
"learning_rate": 6.202891566265062e-06,
|
|
"loss": 0.1951,
|
|
"step": 47800
|
|
},
|
|
{
|
|
"epoch": 0.3832,
|
|
"grad_norm": 6.88608455657959,
|
|
"learning_rate": 6.194859437751004e-06,
|
|
"loss": 0.2107,
|
|
"step": 47900
|
|
},
|
|
{
|
|
"epoch": 0.384,
|
|
"grad_norm": 10.85550594329834,
|
|
"learning_rate": 6.186827309236948e-06,
|
|
"loss": 0.2117,
|
|
"step": 48000
|
|
},
|
|
{
|
|
"epoch": 0.384,
|
|
"eval_test1_cer": 0.05253379640002988,
|
|
"eval_test1_cer_norm": 0.035930559223953704,
|
|
"eval_test1_loss": 0.1944696605205536,
|
|
"eval_test1_runtime": 2131.2533,
|
|
"eval_test1_samples_per_second": 1.173,
|
|
"eval_test1_steps_per_second": 0.293,
|
|
"eval_test1_wer": 0.15726406017668154,
|
|
"eval_test1_wer_norm": 0.09462579268827259,
|
|
"step": 48000
|
|
},
|
|
{
|
|
"epoch": 0.384,
|
|
"eval_test2_cer": 0.11918094598125957,
|
|
"eval_test2_cer_norm": 0.08796870158041524,
|
|
"eval_test2_loss": 0.3547360897064209,
|
|
"eval_test2_runtime": 2218.4059,
|
|
"eval_test2_samples_per_second": 1.127,
|
|
"eval_test2_steps_per_second": 0.282,
|
|
"eval_test2_wer": 0.2637045090409705,
|
|
"eval_test2_wer_norm": 0.1967396286958515,
|
|
"step": 48000
|
|
},
|
|
{
|
|
"epoch": 0.3848,
|
|
"grad_norm": 8.570365905761719,
|
|
"learning_rate": 6.178795180722892e-06,
|
|
"loss": 0.2176,
|
|
"step": 48100
|
|
},
|
|
{
|
|
"epoch": 0.3856,
|
|
"grad_norm": 6.4876604080200195,
|
|
"learning_rate": 6.170763052208836e-06,
|
|
"loss": 0.2062,
|
|
"step": 48200
|
|
},
|
|
{
|
|
"epoch": 0.3864,
|
|
"grad_norm": 4.121545314788818,
|
|
"learning_rate": 6.1627309236947796e-06,
|
|
"loss": 0.1855,
|
|
"step": 48300
|
|
},
|
|
{
|
|
"epoch": 0.3872,
|
|
"grad_norm": 8.148366928100586,
|
|
"learning_rate": 6.1546987951807235e-06,
|
|
"loss": 0.1953,
|
|
"step": 48400
|
|
},
|
|
{
|
|
"epoch": 0.388,
|
|
"grad_norm": 5.750129222869873,
|
|
"learning_rate": 6.146666666666667e-06,
|
|
"loss": 0.1947,
|
|
"step": 48500
|
|
},
|
|
{
|
|
"epoch": 0.3888,
|
|
"grad_norm": 7.019141674041748,
|
|
"learning_rate": 6.138634538152611e-06,
|
|
"loss": 0.1868,
|
|
"step": 48600
|
|
},
|
|
{
|
|
"epoch": 0.3896,
|
|
"grad_norm": 4.4510273933410645,
|
|
"learning_rate": 6.130602409638555e-06,
|
|
"loss": 0.2102,
|
|
"step": 48700
|
|
},
|
|
{
|
|
"epoch": 0.3904,
|
|
"grad_norm": 7.094069480895996,
|
|
"learning_rate": 6.122570281124498e-06,
|
|
"loss": 0.1899,
|
|
"step": 48800
|
|
},
|
|
{
|
|
"epoch": 0.3912,
|
|
"grad_norm": 6.22620153427124,
|
|
"learning_rate": 6.114538152610442e-06,
|
|
"loss": 0.2206,
|
|
"step": 48900
|
|
},
|
|
{
|
|
"epoch": 0.392,
|
|
"grad_norm": 7.25572395324707,
|
|
"learning_rate": 6.106506024096386e-06,
|
|
"loss": 0.1914,
|
|
"step": 49000
|
|
},
|
|
{
|
|
"epoch": 0.3928,
|
|
"grad_norm": 5.544373512268066,
|
|
"learning_rate": 6.0985542168674705e-06,
|
|
"loss": 0.1877,
|
|
"step": 49100
|
|
},
|
|
{
|
|
"epoch": 1.000792,
|
|
"grad_norm": 5.080130577087402,
|
|
"learning_rate": 6.090522088353414e-06,
|
|
"loss": 0.1799,
|
|
"step": 49200
|
|
},
|
|
{
|
|
"epoch": 1.001592,
|
|
"grad_norm": 2.341817617416382,
|
|
"learning_rate": 6.082489959839358e-06,
|
|
"loss": 0.1597,
|
|
"step": 49300
|
|
},
|
|
{
|
|
"epoch": 1.002392,
|
|
"grad_norm": 3.7877089977264404,
|
|
"learning_rate": 6.074457831325302e-06,
|
|
"loss": 0.1463,
|
|
"step": 49400
|
|
},
|
|
{
|
|
"epoch": 1.003192,
|
|
"grad_norm": 2.4026944637298584,
|
|
"learning_rate": 6.066425702811245e-06,
|
|
"loss": 0.1515,
|
|
"step": 49500
|
|
},
|
|
{
|
|
"epoch": 1.003992,
|
|
"grad_norm": 4.575283527374268,
|
|
"learning_rate": 6.058393574297189e-06,
|
|
"loss": 0.1579,
|
|
"step": 49600
|
|
},
|
|
{
|
|
"epoch": 1.004792,
|
|
"grad_norm": 5.175034999847412,
|
|
"learning_rate": 6.050361445783133e-06,
|
|
"loss": 0.1313,
|
|
"step": 49700
|
|
},
|
|
{
|
|
"epoch": 1.005592,
|
|
"grad_norm": 4.901139259338379,
|
|
"learning_rate": 6.042329317269077e-06,
|
|
"loss": 0.1501,
|
|
"step": 49800
|
|
},
|
|
{
|
|
"epoch": 1.006392,
|
|
"grad_norm": 3.304086685180664,
|
|
"learning_rate": 6.034297188755021e-06,
|
|
"loss": 0.1295,
|
|
"step": 49900
|
|
},
|
|
{
|
|
"epoch": 1.007192,
|
|
"grad_norm": 3.6357131004333496,
|
|
"learning_rate": 6.026265060240965e-06,
|
|
"loss": 0.127,
|
|
"step": 50000
|
|
},
|
|
{
|
|
"epoch": 1.007992,
|
|
"grad_norm": 23.502784729003906,
|
|
"learning_rate": 6.018232931726908e-06,
|
|
"loss": 0.1354,
|
|
"step": 50100
|
|
},
|
|
{
|
|
"epoch": 1.008792,
|
|
"grad_norm": 2.340665817260742,
|
|
"learning_rate": 6.010200803212852e-06,
|
|
"loss": 0.1405,
|
|
"step": 50200
|
|
},
|
|
{
|
|
"epoch": 1.009592,
|
|
"grad_norm": 4.528487205505371,
|
|
"learning_rate": 6.002168674698796e-06,
|
|
"loss": 0.1299,
|
|
"step": 50300
|
|
},
|
|
{
|
|
"epoch": 1.010392,
|
|
"grad_norm": 3.589071035385132,
|
|
"learning_rate": 5.994136546184739e-06,
|
|
"loss": 0.1478,
|
|
"step": 50400
|
|
},
|
|
{
|
|
"epoch": 1.011192,
|
|
"grad_norm": 11.359587669372559,
|
|
"learning_rate": 5.986104417670683e-06,
|
|
"loss": 0.133,
|
|
"step": 50500
|
|
},
|
|
{
|
|
"epoch": 1.011992,
|
|
"grad_norm": 2.8059144020080566,
|
|
"learning_rate": 5.978072289156627e-06,
|
|
"loss": 0.1366,
|
|
"step": 50600
|
|
},
|
|
{
|
|
"epoch": 1.012792,
|
|
"grad_norm": 8.648773193359375,
|
|
"learning_rate": 5.970040160642571e-06,
|
|
"loss": 0.1499,
|
|
"step": 50700
|
|
},
|
|
{
|
|
"epoch": 1.013592,
|
|
"grad_norm": 7.918379306793213,
|
|
"learning_rate": 5.9620080321285146e-06,
|
|
"loss": 0.1493,
|
|
"step": 50800
|
|
},
|
|
{
|
|
"epoch": 1.014392,
|
|
"grad_norm": 5.396472454071045,
|
|
"learning_rate": 5.9539759036144585e-06,
|
|
"loss": 0.1433,
|
|
"step": 50900
|
|
},
|
|
{
|
|
"epoch": 1.015192,
|
|
"grad_norm": 6.0125412940979,
|
|
"learning_rate": 5.945943775100402e-06,
|
|
"loss": 0.1216,
|
|
"step": 51000
|
|
},
|
|
{
|
|
"epoch": 1.015992,
|
|
"grad_norm": 5.78593111038208,
|
|
"learning_rate": 5.937911646586346e-06,
|
|
"loss": 0.1524,
|
|
"step": 51100
|
|
},
|
|
{
|
|
"epoch": 1.016792,
|
|
"grad_norm": 2.646674156188965,
|
|
"learning_rate": 5.92995983935743e-06,
|
|
"loss": 0.1303,
|
|
"step": 51200
|
|
},
|
|
{
|
|
"epoch": 1.017592,
|
|
"grad_norm": 4.267711639404297,
|
|
"learning_rate": 5.921927710843374e-06,
|
|
"loss": 0.1298,
|
|
"step": 51300
|
|
},
|
|
{
|
|
"epoch": 1.018392,
|
|
"grad_norm": 6.027831554412842,
|
|
"learning_rate": 5.913895582329318e-06,
|
|
"loss": 0.1361,
|
|
"step": 51400
|
|
},
|
|
{
|
|
"epoch": 1.019192,
|
|
"grad_norm": 7.927816390991211,
|
|
"learning_rate": 5.9058634538152615e-06,
|
|
"loss": 0.1341,
|
|
"step": 51500
|
|
},
|
|
{
|
|
"epoch": 1.019992,
|
|
"grad_norm": 9.55642318725586,
|
|
"learning_rate": 5.8978313253012055e-06,
|
|
"loss": 0.1351,
|
|
"step": 51600
|
|
},
|
|
{
|
|
"epoch": 1.020792,
|
|
"grad_norm": 5.5017409324646,
|
|
"learning_rate": 5.889799196787149e-06,
|
|
"loss": 0.1325,
|
|
"step": 51700
|
|
},
|
|
{
|
|
"epoch": 1.021592,
|
|
"grad_norm": 5.213791370391846,
|
|
"learning_rate": 5.881767068273093e-06,
|
|
"loss": 0.1418,
|
|
"step": 51800
|
|
},
|
|
{
|
|
"epoch": 1.022392,
|
|
"grad_norm": 2.497223138809204,
|
|
"learning_rate": 5.873734939759037e-06,
|
|
"loss": 0.1407,
|
|
"step": 51900
|
|
},
|
|
{
|
|
"epoch": 1.023192,
|
|
"grad_norm": 3.251509428024292,
|
|
"learning_rate": 5.8657028112449794e-06,
|
|
"loss": 0.133,
|
|
"step": 52000
|
|
},
|
|
{
|
|
"epoch": 1.023192,
|
|
"eval_test1_cer": 0.04808518186571066,
|
|
"eval_test1_cer_norm": 0.032487334021658223,
|
|
"eval_test1_loss": 0.19697345793247223,
|
|
"eval_test1_runtime": 1190.0899,
|
|
"eval_test1_samples_per_second": 2.101,
|
|
"eval_test1_steps_per_second": 0.525,
|
|
"eval_test1_wer": 0.1493629551881979,
|
|
"eval_test1_wer_norm": 0.08699845115286829,
|
|
"step": 52000
|
|
},
|
|
{
|
|
"epoch": 1.023192,
|
|
"eval_test2_cer": 0.11403385970806734,
|
|
"eval_test2_cer_norm": 0.08345115432290053,
|
|
"eval_test2_loss": 0.36047160625457764,
|
|
"eval_test2_runtime": 1226.7275,
|
|
"eval_test2_samples_per_second": 2.038,
|
|
"eval_test2_steps_per_second": 0.509,
|
|
"eval_test2_wer": 0.24490730144197756,
|
|
"eval_test2_wer_norm": 0.17745817098326838,
|
|
"step": 52000
|
|
},
|
|
{
|
|
"epoch": 1.023992,
|
|
"grad_norm": 7.025960922241211,
|
|
"learning_rate": 5.857670682730923e-06,
|
|
"loss": 0.1494,
|
|
"step": 52100
|
|
},
|
|
{
|
|
"epoch": 1.024792,
|
|
"grad_norm": 4.487267971038818,
|
|
"learning_rate": 5.849638554216867e-06,
|
|
"loss": 0.1403,
|
|
"step": 52200
|
|
},
|
|
{
|
|
"epoch": 1.025592,
|
|
"grad_norm": 4.388223648071289,
|
|
"learning_rate": 5.841606425702811e-06,
|
|
"loss": 0.1172,
|
|
"step": 52300
|
|
},
|
|
{
|
|
"epoch": 1.026392,
|
|
"grad_norm": 6.941164493560791,
|
|
"learning_rate": 5.833574297188755e-06,
|
|
"loss": 0.1218,
|
|
"step": 52400
|
|
},
|
|
{
|
|
"epoch": 1.027192,
|
|
"grad_norm": 6.419257640838623,
|
|
"learning_rate": 5.825542168674699e-06,
|
|
"loss": 0.1317,
|
|
"step": 52500
|
|
},
|
|
{
|
|
"epoch": 1.027992,
|
|
"grad_norm": 6.4062323570251465,
|
|
"learning_rate": 5.817510040160643e-06,
|
|
"loss": 0.1345,
|
|
"step": 52600
|
|
},
|
|
{
|
|
"epoch": 1.028792,
|
|
"grad_norm": 8.374984741210938,
|
|
"learning_rate": 5.809477911646587e-06,
|
|
"loss": 0.126,
|
|
"step": 52700
|
|
},
|
|
{
|
|
"epoch": 1.029592,
|
|
"grad_norm": 9.091133117675781,
|
|
"learning_rate": 5.801445783132531e-06,
|
|
"loss": 0.1247,
|
|
"step": 52800
|
|
},
|
|
{
|
|
"epoch": 1.030392,
|
|
"grad_norm": 6.033024311065674,
|
|
"learning_rate": 5.793413654618474e-06,
|
|
"loss": 0.1273,
|
|
"step": 52900
|
|
},
|
|
{
|
|
"epoch": 1.031192,
|
|
"grad_norm": 4.625008583068848,
|
|
"learning_rate": 5.785381526104418e-06,
|
|
"loss": 0.1448,
|
|
"step": 53000
|
|
},
|
|
{
|
|
"epoch": 1.031992,
|
|
"grad_norm": 3.7176895141601562,
|
|
"learning_rate": 5.777349397590362e-06,
|
|
"loss": 0.1105,
|
|
"step": 53100
|
|
},
|
|
{
|
|
"epoch": 1.032792,
|
|
"grad_norm": 4.577524185180664,
|
|
"learning_rate": 5.769317269076306e-06,
|
|
"loss": 0.1568,
|
|
"step": 53200
|
|
},
|
|
{
|
|
"epoch": 1.033592,
|
|
"grad_norm": 3.0853545665740967,
|
|
"learning_rate": 5.7612851405622496e-06,
|
|
"loss": 0.1369,
|
|
"step": 53300
|
|
},
|
|
{
|
|
"epoch": 1.034392,
|
|
"grad_norm": 5.356490135192871,
|
|
"learning_rate": 5.7532530120481935e-06,
|
|
"loss": 0.1373,
|
|
"step": 53400
|
|
},
|
|
{
|
|
"epoch": 1.035192,
|
|
"grad_norm": 7.52249813079834,
|
|
"learning_rate": 5.745220883534137e-06,
|
|
"loss": 0.1256,
|
|
"step": 53500
|
|
},
|
|
{
|
|
"epoch": 1.035992,
|
|
"grad_norm": 6.246498107910156,
|
|
"learning_rate": 5.737188755020081e-06,
|
|
"loss": 0.1473,
|
|
"step": 53600
|
|
},
|
|
{
|
|
"epoch": 1.036792,
|
|
"grad_norm": 6.8541693687438965,
|
|
"learning_rate": 5.729236947791165e-06,
|
|
"loss": 0.1281,
|
|
"step": 53700
|
|
},
|
|
{
|
|
"epoch": 1.037592,
|
|
"grad_norm": 2.0510551929473877,
|
|
"learning_rate": 5.721204819277109e-06,
|
|
"loss": 0.1243,
|
|
"step": 53800
|
|
},
|
|
{
|
|
"epoch": 1.038392,
|
|
"grad_norm": 6.91987419128418,
|
|
"learning_rate": 5.713172690763053e-06,
|
|
"loss": 0.1199,
|
|
"step": 53900
|
|
},
|
|
{
|
|
"epoch": 1.039192,
|
|
"grad_norm": 3.939608097076416,
|
|
"learning_rate": 5.7051405622489965e-06,
|
|
"loss": 0.1334,
|
|
"step": 54000
|
|
},
|
|
{
|
|
"epoch": 1.039992,
|
|
"grad_norm": 8.798985481262207,
|
|
"learning_rate": 5.6971084337349405e-06,
|
|
"loss": 0.1119,
|
|
"step": 54100
|
|
},
|
|
{
|
|
"epoch": 1.040792,
|
|
"grad_norm": 6.9459123611450195,
|
|
"learning_rate": 5.689076305220884e-06,
|
|
"loss": 0.1324,
|
|
"step": 54200
|
|
},
|
|
{
|
|
"epoch": 1.041592,
|
|
"grad_norm": 7.139726161956787,
|
|
"learning_rate": 5.681044176706828e-06,
|
|
"loss": 0.1342,
|
|
"step": 54300
|
|
},
|
|
{
|
|
"epoch": 1.042392,
|
|
"grad_norm": 1.988398790359497,
|
|
"learning_rate": 5.673012048192772e-06,
|
|
"loss": 0.123,
|
|
"step": 54400
|
|
},
|
|
{
|
|
"epoch": 1.043192,
|
|
"grad_norm": 1.8150029182434082,
|
|
"learning_rate": 5.664979919678716e-06,
|
|
"loss": 0.1376,
|
|
"step": 54500
|
|
},
|
|
{
|
|
"epoch": 1.043992,
|
|
"grad_norm": 8.445022583007812,
|
|
"learning_rate": 5.656947791164658e-06,
|
|
"loss": 0.1393,
|
|
"step": 54600
|
|
},
|
|
{
|
|
"epoch": 1.044792,
|
|
"grad_norm": 6.788310527801514,
|
|
"learning_rate": 5.648915662650602e-06,
|
|
"loss": 0.1412,
|
|
"step": 54700
|
|
},
|
|
{
|
|
"epoch": 1.045592,
|
|
"grad_norm": 6.29990816116333,
|
|
"learning_rate": 5.640883534136546e-06,
|
|
"loss": 0.1312,
|
|
"step": 54800
|
|
},
|
|
{
|
|
"epoch": 1.046392,
|
|
"grad_norm": 1.2310799360275269,
|
|
"learning_rate": 5.63285140562249e-06,
|
|
"loss": 0.1331,
|
|
"step": 54900
|
|
},
|
|
{
|
|
"epoch": 1.047192,
|
|
"grad_norm": 5.38347053527832,
|
|
"learning_rate": 5.624819277108434e-06,
|
|
"loss": 0.1295,
|
|
"step": 55000
|
|
},
|
|
{
|
|
"epoch": 1.047992,
|
|
"grad_norm": 3.4652884006500244,
|
|
"learning_rate": 5.616787148594378e-06,
|
|
"loss": 0.1215,
|
|
"step": 55100
|
|
},
|
|
{
|
|
"epoch": 1.048792,
|
|
"grad_norm": 7.579988479614258,
|
|
"learning_rate": 5.608755020080322e-06,
|
|
"loss": 0.1388,
|
|
"step": 55200
|
|
},
|
|
{
|
|
"epoch": 1.049592,
|
|
"grad_norm": 0.8664081692695618,
|
|
"learning_rate": 5.600722891566266e-06,
|
|
"loss": 0.1199,
|
|
"step": 55300
|
|
},
|
|
{
|
|
"epoch": 1.050392,
|
|
"grad_norm": 6.584039211273193,
|
|
"learning_rate": 5.59269076305221e-06,
|
|
"loss": 0.123,
|
|
"step": 55400
|
|
},
|
|
{
|
|
"epoch": 1.051192,
|
|
"grad_norm": 4.955043792724609,
|
|
"learning_rate": 5.584658634538153e-06,
|
|
"loss": 0.1223,
|
|
"step": 55500
|
|
},
|
|
{
|
|
"epoch": 1.051992,
|
|
"grad_norm": 2.857651710510254,
|
|
"learning_rate": 5.576626506024097e-06,
|
|
"loss": 0.1334,
|
|
"step": 55600
|
|
},
|
|
{
|
|
"epoch": 1.052792,
|
|
"grad_norm": 6.896506309509277,
|
|
"learning_rate": 5.568594377510041e-06,
|
|
"loss": 0.1377,
|
|
"step": 55700
|
|
},
|
|
{
|
|
"epoch": 1.053592,
|
|
"grad_norm": 7.342093467712402,
|
|
"learning_rate": 5.5605622489959846e-06,
|
|
"loss": 0.1272,
|
|
"step": 55800
|
|
},
|
|
{
|
|
"epoch": 1.054392,
|
|
"grad_norm": 4.082319259643555,
|
|
"learning_rate": 5.552610441767069e-06,
|
|
"loss": 0.1355,
|
|
"step": 55900
|
|
},
|
|
{
|
|
"epoch": 1.055192,
|
|
"grad_norm": 5.364993572235107,
|
|
"learning_rate": 5.544578313253013e-06,
|
|
"loss": 0.1305,
|
|
"step": 56000
|
|
},
|
|
{
|
|
"epoch": 1.055192,
|
|
"eval_test1_cer": 0.05301460153857644,
|
|
"eval_test1_cer_norm": 0.03525824189017216,
|
|
"eval_test1_loss": 0.19662749767303467,
|
|
"eval_test1_runtime": 1188.1388,
|
|
"eval_test1_samples_per_second": 2.104,
|
|
"eval_test1_steps_per_second": 0.526,
|
|
"eval_test1_wer": 0.15286159946354122,
|
|
"eval_test1_wer_norm": 0.09231713375610041,
|
|
"step": 56000
|
|
},
|
|
{
|
|
"epoch": 1.055192,
|
|
"eval_test2_cer": 0.1047943032067794,
|
|
"eval_test2_cer_norm": 0.08095754570808801,
|
|
"eval_test2_loss": 0.36192458868026733,
|
|
"eval_test2_runtime": 1219.4681,
|
|
"eval_test2_samples_per_second": 2.05,
|
|
"eval_test2_steps_per_second": 0.513,
|
|
"eval_test2_wer": 0.22871366445410848,
|
|
"eval_test2_wer_norm": 0.1619012147604859,
|
|
"step": 56000
|
|
},
|
|
{
|
|
"epoch": 1.055992,
|
|
"grad_norm": 5.108110427856445,
|
|
"learning_rate": 5.536546184738957e-06,
|
|
"loss": 0.1304,
|
|
"step": 56100
|
|
},
|
|
{
|
|
"epoch": 1.056792,
|
|
"grad_norm": 1.9956285953521729,
|
|
"learning_rate": 5.5285140562249e-06,
|
|
"loss": 0.1325,
|
|
"step": 56200
|
|
},
|
|
{
|
|
"epoch": 1.057592,
|
|
"grad_norm": 7.1683220863342285,
|
|
"learning_rate": 5.520481927710844e-06,
|
|
"loss": 0.1315,
|
|
"step": 56300
|
|
},
|
|
{
|
|
"epoch": 1.058392,
|
|
"grad_norm": 6.5139546394348145,
|
|
"learning_rate": 5.512449799196788e-06,
|
|
"loss": 0.1287,
|
|
"step": 56400
|
|
},
|
|
{
|
|
"epoch": 1.059192,
|
|
"grad_norm": 6.996273040771484,
|
|
"learning_rate": 5.5044176706827315e-06,
|
|
"loss": 0.1273,
|
|
"step": 56500
|
|
},
|
|
{
|
|
"epoch": 1.059992,
|
|
"grad_norm": 1.6335968971252441,
|
|
"learning_rate": 5.4963855421686755e-06,
|
|
"loss": 0.1157,
|
|
"step": 56600
|
|
},
|
|
{
|
|
"epoch": 1.060792,
|
|
"grad_norm": 4.1844892501831055,
|
|
"learning_rate": 5.488353413654619e-06,
|
|
"loss": 0.1316,
|
|
"step": 56700
|
|
},
|
|
{
|
|
"epoch": 1.061592,
|
|
"grad_norm": 8.634051322937012,
|
|
"learning_rate": 5.480321285140563e-06,
|
|
"loss": 0.1083,
|
|
"step": 56800
|
|
},
|
|
{
|
|
"epoch": 1.062392,
|
|
"grad_norm": 8.461755752563477,
|
|
"learning_rate": 5.472289156626507e-06,
|
|
"loss": 0.1307,
|
|
"step": 56900
|
|
},
|
|
{
|
|
"epoch": 1.063192,
|
|
"grad_norm": 5.679012298583984,
|
|
"learning_rate": 5.464257028112451e-06,
|
|
"loss": 0.133,
|
|
"step": 57000
|
|
},
|
|
{
|
|
"epoch": 1.063992,
|
|
"grad_norm": 5.279029846191406,
|
|
"learning_rate": 5.456224899598393e-06,
|
|
"loss": 0.1203,
|
|
"step": 57100
|
|
},
|
|
{
|
|
"epoch": 1.064792,
|
|
"grad_norm": 6.086945533752441,
|
|
"learning_rate": 5.448192771084337e-06,
|
|
"loss": 0.1371,
|
|
"step": 57200
|
|
},
|
|
{
|
|
"epoch": 1.065592,
|
|
"grad_norm": 4.381275177001953,
|
|
"learning_rate": 5.440160642570281e-06,
|
|
"loss": 0.134,
|
|
"step": 57300
|
|
},
|
|
{
|
|
"epoch": 1.066392,
|
|
"grad_norm": 3.3568952083587646,
|
|
"learning_rate": 5.432128514056225e-06,
|
|
"loss": 0.1294,
|
|
"step": 57400
|
|
},
|
|
{
|
|
"epoch": 1.067192,
|
|
"grad_norm": 5.998335838317871,
|
|
"learning_rate": 5.424096385542169e-06,
|
|
"loss": 0.1074,
|
|
"step": 57500
|
|
},
|
|
{
|
|
"epoch": 1.067992,
|
|
"grad_norm": 5.995476245880127,
|
|
"learning_rate": 5.416064257028113e-06,
|
|
"loss": 0.1129,
|
|
"step": 57600
|
|
},
|
|
{
|
|
"epoch": 1.068792,
|
|
"grad_norm": 7.281689167022705,
|
|
"learning_rate": 5.408112449799197e-06,
|
|
"loss": 0.1342,
|
|
"step": 57700
|
|
},
|
|
{
|
|
"epoch": 1.069592,
|
|
"grad_norm": 3.4488370418548584,
|
|
"learning_rate": 5.40008032128514e-06,
|
|
"loss": 0.1126,
|
|
"step": 57800
|
|
},
|
|
{
|
|
"epoch": 1.070392,
|
|
"grad_norm": 6.784258842468262,
|
|
"learning_rate": 5.392048192771084e-06,
|
|
"loss": 0.129,
|
|
"step": 57900
|
|
},
|
|
{
|
|
"epoch": 1.071192,
|
|
"grad_norm": 3.7626733779907227,
|
|
"learning_rate": 5.384016064257028e-06,
|
|
"loss": 0.1323,
|
|
"step": 58000
|
|
},
|
|
{
|
|
"epoch": 1.071992,
|
|
"grad_norm": 6.320082187652588,
|
|
"learning_rate": 5.375983935742972e-06,
|
|
"loss": 0.1275,
|
|
"step": 58100
|
|
},
|
|
{
|
|
"epoch": 1.072792,
|
|
"grad_norm": 10.221648216247559,
|
|
"learning_rate": 5.367951807228916e-06,
|
|
"loss": 0.1229,
|
|
"step": 58200
|
|
},
|
|
{
|
|
"epoch": 1.073592,
|
|
"grad_norm": 3.2260794639587402,
|
|
"learning_rate": 5.35991967871486e-06,
|
|
"loss": 0.1237,
|
|
"step": 58300
|
|
},
|
|
{
|
|
"epoch": 1.074392,
|
|
"grad_norm": 7.026370525360107,
|
|
"learning_rate": 5.351887550200804e-06,
|
|
"loss": 0.1351,
|
|
"step": 58400
|
|
},
|
|
{
|
|
"epoch": 1.075192,
|
|
"grad_norm": 3.7613117694854736,
|
|
"learning_rate": 5.343855421686748e-06,
|
|
"loss": 0.1246,
|
|
"step": 58500
|
|
},
|
|
{
|
|
"epoch": 1.075992,
|
|
"grad_norm": 19.284780502319336,
|
|
"learning_rate": 5.335823293172692e-06,
|
|
"loss": 0.1211,
|
|
"step": 58600
|
|
},
|
|
{
|
|
"epoch": 1.076792,
|
|
"grad_norm": 2.5568318367004395,
|
|
"learning_rate": 5.327791164658635e-06,
|
|
"loss": 0.1386,
|
|
"step": 58700
|
|
},
|
|
{
|
|
"epoch": 1.077592,
|
|
"grad_norm": 8.899717330932617,
|
|
"learning_rate": 5.319759036144579e-06,
|
|
"loss": 0.1297,
|
|
"step": 58800
|
|
},
|
|
{
|
|
"epoch": 1.078392,
|
|
"grad_norm": 7.388535022735596,
|
|
"learning_rate": 5.311726907630523e-06,
|
|
"loss": 0.1218,
|
|
"step": 58900
|
|
},
|
|
{
|
|
"epoch": 1.079192,
|
|
"grad_norm": 7.612710952758789,
|
|
"learning_rate": 5.3036947791164666e-06,
|
|
"loss": 0.1342,
|
|
"step": 59000
|
|
},
|
|
{
|
|
"epoch": 1.079992,
|
|
"grad_norm": 6.390445232391357,
|
|
"learning_rate": 5.29566265060241e-06,
|
|
"loss": 0.138,
|
|
"step": 59100
|
|
},
|
|
{
|
|
"epoch": 1.080792,
|
|
"grad_norm": 2.2183034420013428,
|
|
"learning_rate": 5.2876305220883535e-06,
|
|
"loss": 0.1212,
|
|
"step": 59200
|
|
},
|
|
{
|
|
"epoch": 1.081592,
|
|
"grad_norm": 4.985264778137207,
|
|
"learning_rate": 5.2795983935742975e-06,
|
|
"loss": 0.1187,
|
|
"step": 59300
|
|
},
|
|
{
|
|
"epoch": 1.082392,
|
|
"grad_norm": 10.64930534362793,
|
|
"learning_rate": 5.271566265060241e-06,
|
|
"loss": 0.1109,
|
|
"step": 59400
|
|
},
|
|
{
|
|
"epoch": 1.083192,
|
|
"grad_norm": 6.063853740692139,
|
|
"learning_rate": 5.263534136546185e-06,
|
|
"loss": 0.1136,
|
|
"step": 59500
|
|
},
|
|
{
|
|
"epoch": 1.083992,
|
|
"grad_norm": 6.947314739227295,
|
|
"learning_rate": 5.255502008032128e-06,
|
|
"loss": 0.1289,
|
|
"step": 59600
|
|
},
|
|
{
|
|
"epoch": 1.084792,
|
|
"grad_norm": 2.270883560180664,
|
|
"learning_rate": 5.247469879518072e-06,
|
|
"loss": 0.1298,
|
|
"step": 59700
|
|
},
|
|
{
|
|
"epoch": 1.0855920000000001,
|
|
"grad_norm": 0.7966949939727783,
|
|
"learning_rate": 5.239437751004016e-06,
|
|
"loss": 0.1272,
|
|
"step": 59800
|
|
},
|
|
{
|
|
"epoch": 1.086392,
|
|
"grad_norm": 2.719560384750366,
|
|
"learning_rate": 5.23140562248996e-06,
|
|
"loss": 0.0987,
|
|
"step": 59900
|
|
},
|
|
{
|
|
"epoch": 1.087192,
|
|
"grad_norm": 6.565618515014648,
|
|
"learning_rate": 5.223373493975904e-06,
|
|
"loss": 0.1165,
|
|
"step": 60000
|
|
},
|
|
{
|
|
"epoch": 1.087192,
|
|
"eval_test1_cer": 0.05625886922100232,
|
|
"eval_test1_cer_norm": 0.0387686988258458,
|
|
"eval_test1_loss": 0.199602872133255,
|
|
"eval_test1_runtime": 1190.6217,
|
|
"eval_test1_samples_per_second": 2.1,
|
|
"eval_test1_steps_per_second": 0.525,
|
|
"eval_test1_wer": 0.1555438934079711,
|
|
"eval_test1_wer_norm": 0.09591162804290014,
|
|
"step": 60000
|
|
},
|
|
{
|
|
"epoch": 1.087192,
|
|
"eval_test2_cer": 0.12617127711203197,
|
|
"eval_test2_cer_norm": 0.09629202819956616,
|
|
"eval_test2_loss": 0.36100009083747864,
|
|
"eval_test2_runtime": 1239.0925,
|
|
"eval_test2_samples_per_second": 2.018,
|
|
"eval_test2_steps_per_second": 0.504,
|
|
"eval_test2_wer": 0.25226024261844815,
|
|
"eval_test2_wer_norm": 0.18728512491404997,
|
|
"step": 60000
|
|
},
|
|
{
|
|
"epoch": 1.087992,
|
|
"grad_norm": 6.482507228851318,
|
|
"learning_rate": 5.215341365461848e-06,
|
|
"loss": 0.1414,
|
|
"step": 60100
|
|
},
|
|
{
|
|
"epoch": 1.088792,
|
|
"grad_norm": 6.777385234832764,
|
|
"learning_rate": 5.207309236947792e-06,
|
|
"loss": 0.1258,
|
|
"step": 60200
|
|
},
|
|
{
|
|
"epoch": 1.0895920000000001,
|
|
"grad_norm": 6.040877819061279,
|
|
"learning_rate": 5.199277108433736e-06,
|
|
"loss": 0.1167,
|
|
"step": 60300
|
|
},
|
|
{
|
|
"epoch": 1.090392,
|
|
"grad_norm": 4.490113258361816,
|
|
"learning_rate": 5.19124497991968e-06,
|
|
"loss": 0.113,
|
|
"step": 60400
|
|
},
|
|
{
|
|
"epoch": 1.091192,
|
|
"grad_norm": 7.03712797164917,
|
|
"learning_rate": 5.183212851405624e-06,
|
|
"loss": 0.1084,
|
|
"step": 60500
|
|
},
|
|
{
|
|
"epoch": 1.091992,
|
|
"grad_norm": 2.478569984436035,
|
|
"learning_rate": 5.175180722891566e-06,
|
|
"loss": 0.1162,
|
|
"step": 60600
|
|
},
|
|
{
|
|
"epoch": 1.092792,
|
|
"grad_norm": 2.2656235694885254,
|
|
"learning_rate": 5.16714859437751e-06,
|
|
"loss": 0.1138,
|
|
"step": 60700
|
|
},
|
|
{
|
|
"epoch": 1.093592,
|
|
"grad_norm": 6.424910545349121,
|
|
"learning_rate": 5.159116465863454e-06,
|
|
"loss": 0.1212,
|
|
"step": 60800
|
|
},
|
|
{
|
|
"epoch": 1.094392,
|
|
"grad_norm": 4.551820278167725,
|
|
"learning_rate": 5.151084337349398e-06,
|
|
"loss": 0.1148,
|
|
"step": 60900
|
|
},
|
|
{
|
|
"epoch": 1.095192,
|
|
"grad_norm": 4.301316738128662,
|
|
"learning_rate": 5.1430522088353416e-06,
|
|
"loss": 0.1316,
|
|
"step": 61000
|
|
},
|
|
{
|
|
"epoch": 1.095992,
|
|
"grad_norm": 5.7936530113220215,
|
|
"learning_rate": 5.1350200803212855e-06,
|
|
"loss": 0.1036,
|
|
"step": 61100
|
|
},
|
|
{
|
|
"epoch": 1.096792,
|
|
"grad_norm": 7.100841045379639,
|
|
"learning_rate": 5.126987951807229e-06,
|
|
"loss": 0.1138,
|
|
"step": 61200
|
|
},
|
|
{
|
|
"epoch": 1.097592,
|
|
"grad_norm": 2.908416986465454,
|
|
"learning_rate": 5.118955823293173e-06,
|
|
"loss": 0.1315,
|
|
"step": 61300
|
|
},
|
|
{
|
|
"epoch": 1.098392,
|
|
"grad_norm": 3.4737966060638428,
|
|
"learning_rate": 5.110923694779117e-06,
|
|
"loss": 0.1333,
|
|
"step": 61400
|
|
},
|
|
{
|
|
"epoch": 1.099192,
|
|
"grad_norm": 4.395986080169678,
|
|
"learning_rate": 5.10289156626506e-06,
|
|
"loss": 0.1168,
|
|
"step": 61500
|
|
},
|
|
{
|
|
"epoch": 1.099992,
|
|
"grad_norm": 5.61540412902832,
|
|
"learning_rate": 5.094859437751004e-06,
|
|
"loss": 0.1006,
|
|
"step": 61600
|
|
},
|
|
{
|
|
"epoch": 1.100792,
|
|
"grad_norm": 7.517305850982666,
|
|
"learning_rate": 5.0869076305220885e-06,
|
|
"loss": 0.1156,
|
|
"step": 61700
|
|
},
|
|
{
|
|
"epoch": 1.101592,
|
|
"grad_norm": 3.4721179008483887,
|
|
"learning_rate": 5.0788755020080325e-06,
|
|
"loss": 0.1029,
|
|
"step": 61800
|
|
},
|
|
{
|
|
"epoch": 1.102392,
|
|
"grad_norm": 8.264998435974121,
|
|
"learning_rate": 5.070843373493976e-06,
|
|
"loss": 0.1173,
|
|
"step": 61900
|
|
},
|
|
{
|
|
"epoch": 1.103192,
|
|
"grad_norm": 5.457461833953857,
|
|
"learning_rate": 5.06281124497992e-06,
|
|
"loss": 0.1189,
|
|
"step": 62000
|
|
},
|
|
{
|
|
"epoch": 1.103992,
|
|
"grad_norm": 5.589449882507324,
|
|
"learning_rate": 5.054779116465864e-06,
|
|
"loss": 0.1162,
|
|
"step": 62100
|
|
},
|
|
{
|
|
"epoch": 1.104792,
|
|
"grad_norm": 6.30127477645874,
|
|
"learning_rate": 5.046746987951807e-06,
|
|
"loss": 0.1196,
|
|
"step": 62200
|
|
},
|
|
{
|
|
"epoch": 1.105592,
|
|
"grad_norm": 3.9898123741149902,
|
|
"learning_rate": 5.038714859437751e-06,
|
|
"loss": 0.1033,
|
|
"step": 62300
|
|
},
|
|
{
|
|
"epoch": 1.106392,
|
|
"grad_norm": 4.696240425109863,
|
|
"learning_rate": 5.030682730923695e-06,
|
|
"loss": 0.1028,
|
|
"step": 62400
|
|
},
|
|
{
|
|
"epoch": 1.107192,
|
|
"grad_norm": 5.9711384773254395,
|
|
"learning_rate": 5.022650602409639e-06,
|
|
"loss": 0.1151,
|
|
"step": 62500
|
|
},
|
|
{
|
|
"epoch": 1.107992,
|
|
"grad_norm": 4.918034076690674,
|
|
"learning_rate": 5.014618473895583e-06,
|
|
"loss": 0.1182,
|
|
"step": 62600
|
|
},
|
|
{
|
|
"epoch": 1.108792,
|
|
"grad_norm": 2.1706135272979736,
|
|
"learning_rate": 5.006586345381527e-06,
|
|
"loss": 0.1219,
|
|
"step": 62700
|
|
},
|
|
{
|
|
"epoch": 1.109592,
|
|
"grad_norm": 3.3897037506103516,
|
|
"learning_rate": 4.998554216867471e-06,
|
|
"loss": 0.109,
|
|
"step": 62800
|
|
},
|
|
{
|
|
"epoch": 1.110392,
|
|
"grad_norm": 5.085102558135986,
|
|
"learning_rate": 4.990522088353414e-06,
|
|
"loss": 0.1182,
|
|
"step": 62900
|
|
},
|
|
{
|
|
"epoch": 1.111192,
|
|
"grad_norm": 6.255206108093262,
|
|
"learning_rate": 4.982489959839358e-06,
|
|
"loss": 0.1228,
|
|
"step": 63000
|
|
},
|
|
{
|
|
"epoch": 1.111992,
|
|
"grad_norm": 5.1899309158325195,
|
|
"learning_rate": 4.974457831325302e-06,
|
|
"loss": 0.1158,
|
|
"step": 63100
|
|
},
|
|
{
|
|
"epoch": 1.112792,
|
|
"grad_norm": 7.743951320648193,
|
|
"learning_rate": 4.966425702811246e-06,
|
|
"loss": 0.1247,
|
|
"step": 63200
|
|
},
|
|
{
|
|
"epoch": 1.113592,
|
|
"grad_norm": 4.700534343719482,
|
|
"learning_rate": 4.958393574297189e-06,
|
|
"loss": 0.1152,
|
|
"step": 63300
|
|
},
|
|
{
|
|
"epoch": 1.114392,
|
|
"grad_norm": 5.209968090057373,
|
|
"learning_rate": 4.950361445783133e-06,
|
|
"loss": 0.1098,
|
|
"step": 63400
|
|
},
|
|
{
|
|
"epoch": 1.115192,
|
|
"grad_norm": 6.524351119995117,
|
|
"learning_rate": 4.9423293172690766e-06,
|
|
"loss": 0.1073,
|
|
"step": 63500
|
|
},
|
|
{
|
|
"epoch": 1.115992,
|
|
"grad_norm": 8.537145614624023,
|
|
"learning_rate": 4.9342971887550205e-06,
|
|
"loss": 0.1137,
|
|
"step": 63600
|
|
},
|
|
{
|
|
"epoch": 1.116792,
|
|
"grad_norm": 8.046852111816406,
|
|
"learning_rate": 4.926265060240964e-06,
|
|
"loss": 0.1059,
|
|
"step": 63700
|
|
},
|
|
{
|
|
"epoch": 1.117592,
|
|
"grad_norm": 6.976133346557617,
|
|
"learning_rate": 4.9182329317269075e-06,
|
|
"loss": 0.1238,
|
|
"step": 63800
|
|
},
|
|
{
|
|
"epoch": 1.118392,
|
|
"grad_norm": 3.892697334289551,
|
|
"learning_rate": 4.910200803212851e-06,
|
|
"loss": 0.1257,
|
|
"step": 63900
|
|
},
|
|
{
|
|
"epoch": 1.119192,
|
|
"grad_norm": 4.94338321685791,
|
|
"learning_rate": 4.902168674698795e-06,
|
|
"loss": 0.1004,
|
|
"step": 64000
|
|
},
|
|
{
|
|
"epoch": 1.119192,
|
|
"eval_test1_cer": 0.046203973411009035,
|
|
"eval_test1_cer_norm": 0.031037049487358033,
|
|
"eval_test1_loss": 0.20126041769981384,
|
|
"eval_test1_runtime": 1185.1944,
|
|
"eval_test1_samples_per_second": 2.109,
|
|
"eval_test1_steps_per_second": 0.527,
|
|
"eval_test1_wer": 0.1458934662818158,
|
|
"eval_test1_wer_norm": 0.08451445103597417,
|
|
"step": 64000
|
|
},
|
|
{
|
|
"epoch": 1.119192,
|
|
"eval_test2_cer": 0.09468212192481427,
|
|
"eval_test2_cer_norm": 0.07436764022311744,
|
|
"eval_test2_loss": 0.36592334508895874,
|
|
"eval_test2_runtime": 1203.0799,
|
|
"eval_test2_samples_per_second": 2.078,
|
|
"eval_test2_steps_per_second": 0.519,
|
|
"eval_test2_wer": 0.21420805676356144,
|
|
"eval_test2_wer_norm": 0.14769080907632362,
|
|
"step": 64000
|
|
},
|
|
{
|
|
"epoch": 1.119992,
|
|
"grad_norm": 7.434136867523193,
|
|
"learning_rate": 4.894136546184739e-06,
|
|
"loss": 0.1235,
|
|
"step": 64100
|
|
},
|
|
{
|
|
"epoch": 1.120792,
|
|
"grad_norm": 4.429540157318115,
|
|
"learning_rate": 4.886104417670683e-06,
|
|
"loss": 0.1095,
|
|
"step": 64200
|
|
},
|
|
{
|
|
"epoch": 1.121592,
|
|
"grad_norm": 3.018216133117676,
|
|
"learning_rate": 4.878072289156627e-06,
|
|
"loss": 0.1165,
|
|
"step": 64300
|
|
},
|
|
{
|
|
"epoch": 1.122392,
|
|
"grad_norm": 3.923384189605713,
|
|
"learning_rate": 4.870040160642571e-06,
|
|
"loss": 0.1174,
|
|
"step": 64400
|
|
},
|
|
{
|
|
"epoch": 1.123192,
|
|
"grad_norm": 9.313278198242188,
|
|
"learning_rate": 4.862008032128515e-06,
|
|
"loss": 0.1145,
|
|
"step": 64500
|
|
},
|
|
{
|
|
"epoch": 1.123992,
|
|
"grad_norm": 4.272242069244385,
|
|
"learning_rate": 4.853975903614459e-06,
|
|
"loss": 0.1187,
|
|
"step": 64600
|
|
},
|
|
{
|
|
"epoch": 1.124792,
|
|
"grad_norm": 5.922658920288086,
|
|
"learning_rate": 4.845943775100402e-06,
|
|
"loss": 0.1142,
|
|
"step": 64700
|
|
},
|
|
{
|
|
"epoch": 1.125592,
|
|
"grad_norm": 6.251376628875732,
|
|
"learning_rate": 4.837911646586346e-06,
|
|
"loss": 0.115,
|
|
"step": 64800
|
|
},
|
|
{
|
|
"epoch": 1.126392,
|
|
"grad_norm": 7.813810348510742,
|
|
"learning_rate": 4.82987951807229e-06,
|
|
"loss": 0.119,
|
|
"step": 64900
|
|
},
|
|
{
|
|
"epoch": 1.127192,
|
|
"grad_norm": 2.6300604343414307,
|
|
"learning_rate": 4.821847389558234e-06,
|
|
"loss": 0.1097,
|
|
"step": 65000
|
|
},
|
|
{
|
|
"epoch": 1.1279919999999999,
|
|
"grad_norm": 3.9435408115386963,
|
|
"learning_rate": 4.813815261044177e-06,
|
|
"loss": 0.1368,
|
|
"step": 65100
|
|
},
|
|
{
|
|
"epoch": 1.128792,
|
|
"grad_norm": 3.7023379802703857,
|
|
"learning_rate": 4.805783132530121e-06,
|
|
"loss": 0.1216,
|
|
"step": 65200
|
|
},
|
|
{
|
|
"epoch": 1.129592,
|
|
"grad_norm": 3.5150551795959473,
|
|
"learning_rate": 4.797751004016065e-06,
|
|
"loss": 0.1188,
|
|
"step": 65300
|
|
},
|
|
{
|
|
"epoch": 1.130392,
|
|
"grad_norm": 8.510544776916504,
|
|
"learning_rate": 4.7897188755020085e-06,
|
|
"loss": 0.1071,
|
|
"step": 65400
|
|
},
|
|
{
|
|
"epoch": 1.131192,
|
|
"grad_norm": 4.728481292724609,
|
|
"learning_rate": 4.7816867469879524e-06,
|
|
"loss": 0.1149,
|
|
"step": 65500
|
|
},
|
|
{
|
|
"epoch": 1.1319919999999999,
|
|
"grad_norm": 2.2337489128112793,
|
|
"learning_rate": 4.7736546184738955e-06,
|
|
"loss": 0.1155,
|
|
"step": 65600
|
|
},
|
|
{
|
|
"epoch": 1.132792,
|
|
"grad_norm": 8.161867141723633,
|
|
"learning_rate": 4.765622489959839e-06,
|
|
"loss": 0.115,
|
|
"step": 65700
|
|
},
|
|
{
|
|
"epoch": 1.133592,
|
|
"grad_norm": 5.036279678344727,
|
|
"learning_rate": 4.757590361445783e-06,
|
|
"loss": 0.1316,
|
|
"step": 65800
|
|
},
|
|
{
|
|
"epoch": 1.134392,
|
|
"grad_norm": 9.957592964172363,
|
|
"learning_rate": 4.749558232931727e-06,
|
|
"loss": 0.1127,
|
|
"step": 65900
|
|
},
|
|
{
|
|
"epoch": 1.135192,
|
|
"grad_norm": 4.2074809074401855,
|
|
"learning_rate": 4.741526104417671e-06,
|
|
"loss": 0.1092,
|
|
"step": 66000
|
|
},
|
|
{
|
|
"epoch": 1.135992,
|
|
"grad_norm": 3.1782147884368896,
|
|
"learning_rate": 4.733493975903615e-06,
|
|
"loss": 0.111,
|
|
"step": 66100
|
|
},
|
|
{
|
|
"epoch": 1.136792,
|
|
"grad_norm": 3.371274709701538,
|
|
"learning_rate": 4.725461847389559e-06,
|
|
"loss": 0.1286,
|
|
"step": 66200
|
|
},
|
|
{
|
|
"epoch": 1.137592,
|
|
"grad_norm": 3.8433542251586914,
|
|
"learning_rate": 4.717429718875502e-06,
|
|
"loss": 0.109,
|
|
"step": 66300
|
|
},
|
|
{
|
|
"epoch": 1.138392,
|
|
"grad_norm": 5.162513256072998,
|
|
"learning_rate": 4.709477911646586e-06,
|
|
"loss": 0.1066,
|
|
"step": 66400
|
|
},
|
|
{
|
|
"epoch": 1.139192,
|
|
"grad_norm": 3.1563637256622314,
|
|
"learning_rate": 4.70144578313253e-06,
|
|
"loss": 0.1252,
|
|
"step": 66500
|
|
},
|
|
{
|
|
"epoch": 1.139992,
|
|
"grad_norm": 10.928020477294922,
|
|
"learning_rate": 4.693413654618474e-06,
|
|
"loss": 0.1255,
|
|
"step": 66600
|
|
},
|
|
{
|
|
"epoch": 1.140792,
|
|
"grad_norm": 12.533303260803223,
|
|
"learning_rate": 4.685381526104418e-06,
|
|
"loss": 0.1141,
|
|
"step": 66700
|
|
},
|
|
{
|
|
"epoch": 1.141592,
|
|
"grad_norm": 4.71325159072876,
|
|
"learning_rate": 4.677349397590361e-06,
|
|
"loss": 0.1074,
|
|
"step": 66800
|
|
},
|
|
{
|
|
"epoch": 1.142392,
|
|
"grad_norm": 4.4170427322387695,
|
|
"learning_rate": 4.669317269076305e-06,
|
|
"loss": 0.1081,
|
|
"step": 66900
|
|
},
|
|
{
|
|
"epoch": 1.143192,
|
|
"grad_norm": 7.2962965965271,
|
|
"learning_rate": 4.661285140562249e-06,
|
|
"loss": 0.1115,
|
|
"step": 67000
|
|
},
|
|
{
|
|
"epoch": 1.143992,
|
|
"grad_norm": 6.532419204711914,
|
|
"learning_rate": 4.653253012048193e-06,
|
|
"loss": 0.1194,
|
|
"step": 67100
|
|
},
|
|
{
|
|
"epoch": 1.144792,
|
|
"grad_norm": 9.143524169921875,
|
|
"learning_rate": 4.645220883534137e-06,
|
|
"loss": 0.1148,
|
|
"step": 67200
|
|
},
|
|
{
|
|
"epoch": 1.145592,
|
|
"grad_norm": 2.3634395599365234,
|
|
"learning_rate": 4.637188755020081e-06,
|
|
"loss": 0.1322,
|
|
"step": 67300
|
|
},
|
|
{
|
|
"epoch": 1.146392,
|
|
"grad_norm": 8.534736633300781,
|
|
"learning_rate": 4.629156626506025e-06,
|
|
"loss": 0.1033,
|
|
"step": 67400
|
|
},
|
|
{
|
|
"epoch": 1.147192,
|
|
"grad_norm": 7.3824944496154785,
|
|
"learning_rate": 4.621124497991969e-06,
|
|
"loss": 0.1109,
|
|
"step": 67500
|
|
},
|
|
{
|
|
"epoch": 1.147992,
|
|
"grad_norm": 3.094473123550415,
|
|
"learning_rate": 4.613092369477913e-06,
|
|
"loss": 0.1201,
|
|
"step": 67600
|
|
},
|
|
{
|
|
"epoch": 1.148792,
|
|
"grad_norm": 4.593748569488525,
|
|
"learning_rate": 4.605060240963856e-06,
|
|
"loss": 0.1096,
|
|
"step": 67700
|
|
},
|
|
{
|
|
"epoch": 1.149592,
|
|
"grad_norm": 4.944604396820068,
|
|
"learning_rate": 4.5970281124498e-06,
|
|
"loss": 0.1128,
|
|
"step": 67800
|
|
},
|
|
{
|
|
"epoch": 1.150392,
|
|
"grad_norm": 6.725574493408203,
|
|
"learning_rate": 4.5889959839357435e-06,
|
|
"loss": 0.1129,
|
|
"step": 67900
|
|
},
|
|
{
|
|
"epoch": 1.151192,
|
|
"grad_norm": 8.640876770019531,
|
|
"learning_rate": 4.5809638554216874e-06,
|
|
"loss": 0.0995,
|
|
"step": 68000
|
|
},
|
|
{
|
|
"epoch": 1.151192,
|
|
"eval_test1_cer": 0.05561935170662484,
|
|
"eval_test1_cer_norm": 0.03838931975892621,
|
|
"eval_test1_loss": 0.19823457300662994,
|
|
"eval_test1_runtime": 1196.2654,
|
|
"eval_test1_samples_per_second": 2.09,
|
|
"eval_test1_steps_per_second": 0.522,
|
|
"eval_test1_wer": 0.15913000379019795,
|
|
"eval_test1_wer_norm": 0.09649609865863994,
|
|
"step": 68000
|
|
},
|
|
{
|
|
"epoch": 1.151192,
|
|
"eval_test2_cer": 0.09447679844700788,
|
|
"eval_test2_cer_norm": 0.07438216609854353,
|
|
"eval_test2_loss": 0.36025092005729675,
|
|
"eval_test2_runtime": 1407.094,
|
|
"eval_test2_samples_per_second": 1.777,
|
|
"eval_test2_steps_per_second": 0.444,
|
|
"eval_test2_wer": 0.21621080338750287,
|
|
"eval_test2_wer_norm": 0.14837840935136373,
|
|
"step": 68000
|
|
}
|
|
],
|
|
"logging_steps": 100,
|
|
"max_steps": 125000,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 9223372036854775807,
|
|
"save_steps": 4000,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.7760453484544e+20,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|