diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": 1.0, "best_model_checkpoint": "./hubert-base-phoneme/checkpoint-892", - "epoch": 9.0, - "global_step": 8028, + "epoch": 19.0, + "global_step": 16948, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -4908,11 +4908,5463 @@ "eval_steps_per_second": 1.393, "eval_wer": 3.1538815462559584, "step": 8028 + }, + { + "epoch": 9.0, + "learning_rate": 2.5986100533130237e-05, + "loss": 0.0907, + "step": 8030 + }, + { + "epoch": 9.01, + "learning_rate": 2.598038842345773e-05, + "loss": 0.0646, + "step": 8040 + }, + { + "epoch": 9.02, + "learning_rate": 2.5974676313785227e-05, + "loss": 0.0668, + "step": 8050 + }, + { + "epoch": 9.04, + "learning_rate": 2.596896420411272e-05, + "loss": 0.0592, + "step": 8060 + }, + { + "epoch": 9.05, + "learning_rate": 2.5963252094440214e-05, + "loss": 0.0578, + "step": 8070 + }, + { + "epoch": 9.06, + "learning_rate": 2.595753998476771e-05, + "loss": 0.0817, + "step": 8080 + }, + { + "epoch": 9.07, + "learning_rate": 2.59518278750952e-05, + "loss": 0.0636, + "step": 8090 + }, + { + "epoch": 9.08, + "learning_rate": 2.5946115765422695e-05, + "loss": 0.06, + "step": 8100 + }, + { + "epoch": 9.09, + "learning_rate": 2.594040365575019e-05, + "loss": 0.0576, + "step": 8110 + }, + { + "epoch": 9.1, + "learning_rate": 2.5934691546077685e-05, + "loss": 0.0618, + "step": 8120 + }, + { + "epoch": 9.11, + "learning_rate": 2.592897943640518e-05, + "loss": 0.0834, + "step": 8130 + }, + { + "epoch": 9.13, + "learning_rate": 2.5923267326732672e-05, + "loss": 0.0598, + "step": 8140 + }, + { + "epoch": 9.14, + "learning_rate": 2.591755521706017e-05, + "loss": 0.0628, + "step": 8150 + }, + { + "epoch": 9.15, + "learning_rate": 2.5911843107387663e-05, + "loss": 0.0606, + "step": 8160 + }, + { + "epoch": 9.16, + "learning_rate": 2.5906130997715156e-05, + "loss": 0.0582, + "step": 8170 + }, + { + "epoch": 9.17, + "learning_rate": 2.5900418888042653e-05, + "loss": 0.0812, + "step": 8180 + }, + { + "epoch": 9.18, + "learning_rate": 2.5894706778370147e-05, + "loss": 0.0663, + "step": 8190 + }, + { + "epoch": 9.19, + "learning_rate": 2.588899466869764e-05, + "loss": 0.0632, + "step": 8200 + }, + { + "epoch": 9.2, + "learning_rate": 2.5883282559025134e-05, + "loss": 0.0589, + "step": 8210 + }, + { + "epoch": 9.22, + "learning_rate": 2.5877570449352628e-05, + "loss": 0.0568, + "step": 8220 + }, + { + "epoch": 9.23, + "learning_rate": 2.587185833968012e-05, + "loss": 0.0752, + "step": 8230 + }, + { + "epoch": 9.24, + "learning_rate": 2.5866146230007615e-05, + "loss": 0.0558, + "step": 8240 + }, + { + "epoch": 9.25, + "learning_rate": 2.586043412033511e-05, + "loss": 0.0587, + "step": 8250 + }, + { + "epoch": 9.26, + "learning_rate": 2.5854722010662605e-05, + "loss": 0.0583, + "step": 8260 + }, + { + "epoch": 9.27, + "learning_rate": 2.58490099009901e-05, + "loss": 0.0648, + "step": 8270 + }, + { + "epoch": 9.28, + "learning_rate": 2.5843297791317596e-05, + "loss": 0.0827, + "step": 8280 + }, + { + "epoch": 9.29, + "learning_rate": 2.583758568164509e-05, + "loss": 0.0645, + "step": 8290 + }, + { + "epoch": 9.3, + "learning_rate": 2.5831873571972583e-05, + "loss": 0.0614, + "step": 8300 + }, + { + "epoch": 9.32, + "learning_rate": 2.5826161462300076e-05, + "loss": 0.0608, + "step": 8310 + }, + { + "epoch": 9.33, + "learning_rate": 2.5820449352627573e-05, + "loss": 0.0614, + "step": 8320 + }, + { + "epoch": 9.34, + "learning_rate": 2.5814737242955067e-05, + "loss": 0.0853, + "step": 8330 + }, + { + "epoch": 9.35, + "learning_rate": 2.580902513328256e-05, + "loss": 0.0625, + "step": 8340 + }, + { + "epoch": 9.36, + "learning_rate": 2.5803313023610057e-05, + "loss": 0.0641, + "step": 8350 + }, + { + "epoch": 9.37, + "learning_rate": 2.5797600913937547e-05, + "loss": 0.0672, + "step": 8360 + }, + { + "epoch": 9.38, + "learning_rate": 2.579188880426504e-05, + "loss": 0.057, + "step": 8370 + }, + { + "epoch": 9.39, + "learning_rate": 2.5786176694592534e-05, + "loss": 0.0781, + "step": 8380 + }, + { + "epoch": 9.41, + "learning_rate": 2.578046458492003e-05, + "loss": 0.0601, + "step": 8390 + }, + { + "epoch": 9.42, + "learning_rate": 2.5774752475247525e-05, + "loss": 0.06, + "step": 8400 + }, + { + "epoch": 9.43, + "learning_rate": 2.576904036557502e-05, + "loss": 0.0569, + "step": 8410 + }, + { + "epoch": 9.44, + "learning_rate": 2.5763328255902515e-05, + "loss": 0.0584, + "step": 8420 + }, + { + "epoch": 9.45, + "learning_rate": 2.575761614623001e-05, + "loss": 0.0775, + "step": 8430 + }, + { + "epoch": 9.46, + "learning_rate": 2.5751904036557502e-05, + "loss": 0.0594, + "step": 8440 + }, + { + "epoch": 9.47, + "learning_rate": 2.5746191926885e-05, + "loss": 0.0595, + "step": 8450 + }, + { + "epoch": 9.48, + "learning_rate": 2.5740479817212493e-05, + "loss": 0.0571, + "step": 8460 + }, + { + "epoch": 9.5, + "learning_rate": 2.5734767707539986e-05, + "loss": 0.0645, + "step": 8470 + }, + { + "epoch": 9.51, + "learning_rate": 2.5729626808834732e-05, + "loss": 0.075, + "step": 8480 + }, + { + "epoch": 9.52, + "learning_rate": 2.5723914699162226e-05, + "loss": 0.0607, + "step": 8490 + }, + { + "epoch": 9.53, + "learning_rate": 2.571820258948972e-05, + "loss": 0.0608, + "step": 8500 + }, + { + "epoch": 9.54, + "learning_rate": 2.5712490479817213e-05, + "loss": 0.0574, + "step": 8510 + }, + { + "epoch": 9.55, + "learning_rate": 2.570677837014471e-05, + "loss": 0.0662, + "step": 8520 + }, + { + "epoch": 9.56, + "learning_rate": 2.57010662604722e-05, + "loss": 0.0787, + "step": 8530 + }, + { + "epoch": 9.57, + "learning_rate": 2.5695354150799694e-05, + "loss": 0.0617, + "step": 8540 + }, + { + "epoch": 9.59, + "learning_rate": 2.568964204112719e-05, + "loss": 0.0632, + "step": 8550 + }, + { + "epoch": 9.6, + "learning_rate": 2.5683929931454684e-05, + "loss": 0.0571, + "step": 8560 + }, + { + "epoch": 9.61, + "learning_rate": 2.5678217821782178e-05, + "loss": 0.0552, + "step": 8570 + }, + { + "epoch": 9.62, + "learning_rate": 2.5672505712109674e-05, + "loss": 0.0764, + "step": 8580 + }, + { + "epoch": 9.63, + "learning_rate": 2.5666793602437168e-05, + "loss": 0.0647, + "step": 8590 + }, + { + "epoch": 9.64, + "learning_rate": 2.566108149276466e-05, + "loss": 0.0597, + "step": 8600 + }, + { + "epoch": 9.65, + "learning_rate": 2.5655369383092155e-05, + "loss": 0.0572, + "step": 8610 + }, + { + "epoch": 9.66, + "learning_rate": 2.5649657273419652e-05, + "loss": 0.0554, + "step": 8620 + }, + { + "epoch": 9.67, + "learning_rate": 2.5643945163747146e-05, + "loss": 0.075, + "step": 8630 + }, + { + "epoch": 9.69, + "learning_rate": 2.563823305407464e-05, + "loss": 0.0681, + "step": 8640 + }, + { + "epoch": 9.7, + "learning_rate": 2.5632520944402136e-05, + "loss": 0.0598, + "step": 8650 + }, + { + "epoch": 9.71, + "learning_rate": 2.5626808834729626e-05, + "loss": 0.0604, + "step": 8660 + }, + { + "epoch": 9.72, + "learning_rate": 2.562109672505712e-05, + "loss": 0.0597, + "step": 8670 + }, + { + "epoch": 9.73, + "learning_rate": 2.5615384615384613e-05, + "loss": 0.0812, + "step": 8680 + }, + { + "epoch": 9.74, + "learning_rate": 2.560967250571211e-05, + "loss": 0.0631, + "step": 8690 + }, + { + "epoch": 9.75, + "learning_rate": 2.5603960396039604e-05, + "loss": 0.0569, + "step": 8700 + }, + { + "epoch": 9.76, + "learning_rate": 2.5598248286367097e-05, + "loss": 0.0608, + "step": 8710 + }, + { + "epoch": 9.78, + "learning_rate": 2.5592536176694594e-05, + "loss": 0.0584, + "step": 8720 + }, + { + "epoch": 9.79, + "learning_rate": 2.5586824067022088e-05, + "loss": 0.0812, + "step": 8730 + }, + { + "epoch": 9.8, + "learning_rate": 2.558111195734958e-05, + "loss": 0.0588, + "step": 8740 + }, + { + "epoch": 9.81, + "learning_rate": 2.5575399847677078e-05, + "loss": 0.059, + "step": 8750 + }, + { + "epoch": 9.82, + "learning_rate": 2.5569687738004572e-05, + "loss": 0.0576, + "step": 8760 + }, + { + "epoch": 9.83, + "learning_rate": 2.5563975628332065e-05, + "loss": 0.061, + "step": 8770 + }, + { + "epoch": 9.84, + "learning_rate": 2.555826351865956e-05, + "loss": 0.0924, + "step": 8780 + }, + { + "epoch": 9.85, + "learning_rate": 2.5552551408987052e-05, + "loss": 0.0619, + "step": 8790 + }, + { + "epoch": 9.87, + "learning_rate": 2.5546839299314546e-05, + "loss": 0.0611, + "step": 8800 + }, + { + "epoch": 9.88, + "learning_rate": 2.554112718964204e-05, + "loss": 0.0596, + "step": 8810 + }, + { + "epoch": 9.89, + "learning_rate": 2.5535415079969536e-05, + "loss": 0.0612, + "step": 8820 + }, + { + "epoch": 9.9, + "learning_rate": 2.552970297029703e-05, + "loss": 0.0751, + "step": 8830 + }, + { + "epoch": 9.91, + "learning_rate": 2.5523990860624524e-05, + "loss": 0.0613, + "step": 8840 + }, + { + "epoch": 9.92, + "learning_rate": 2.551827875095202e-05, + "loss": 0.058, + "step": 8850 + }, + { + "epoch": 9.93, + "learning_rate": 2.5512566641279514e-05, + "loss": 0.0593, + "step": 8860 + }, + { + "epoch": 9.94, + "learning_rate": 2.5506854531607008e-05, + "loss": 0.0623, + "step": 8870 + }, + { + "epoch": 9.96, + "learning_rate": 2.55011424219345e-05, + "loss": 0.0769, + "step": 8880 + }, + { + "epoch": 9.97, + "learning_rate": 2.5495430312261998e-05, + "loss": 0.0626, + "step": 8890 + }, + { + "epoch": 9.98, + "learning_rate": 2.548971820258949e-05, + "loss": 0.0552, + "step": 8900 + }, + { + "epoch": 9.99, + "learning_rate": 2.5484006092916985e-05, + "loss": 0.0588, + "step": 8910 + }, + { + "epoch": 10.0, + "learning_rate": 2.5478293983244482e-05, + "loss": 0.0762, + "step": 8920 + }, + { + "epoch": 10.0, + "eval_cer": 1.1474645395666225, + "eval_loss": 0.09519515931606293, + "eval_runtime": 479.4355, + "eval_samples_per_second": 11.103, + "eval_steps_per_second": 1.389, + "eval_wer": 3.1563187687162646, + "step": 8920 + }, + { + "epoch": 10.01, + "learning_rate": 2.5472581873571972e-05, + "loss": 0.0573, + "step": 8930 + }, + { + "epoch": 10.02, + "learning_rate": 2.5466869763899466e-05, + "loss": 0.054, + "step": 8940 + }, + { + "epoch": 10.03, + "learning_rate": 2.546115765422696e-05, + "loss": 0.0527, + "step": 8950 + }, + { + "epoch": 10.04, + "learning_rate": 2.5455445544554456e-05, + "loss": 0.0543, + "step": 8960 + }, + { + "epoch": 10.06, + "learning_rate": 2.544973343488195e-05, + "loss": 0.0729, + "step": 8970 + }, + { + "epoch": 10.07, + "learning_rate": 2.5444021325209443e-05, + "loss": 0.0576, + "step": 8980 + }, + { + "epoch": 10.08, + "learning_rate": 2.543830921553694e-05, + "loss": 0.053, + "step": 8990 + }, + { + "epoch": 10.09, + "learning_rate": 2.5432597105864434e-05, + "loss": 0.0601, + "step": 9000 + }, + { + "epoch": 10.1, + "learning_rate": 2.5426884996191927e-05, + "loss": 0.0484, + "step": 9010 + }, + { + "epoch": 10.11, + "learning_rate": 2.5421172886519424e-05, + "loss": 0.0735, + "step": 9020 + }, + { + "epoch": 10.12, + "learning_rate": 2.5415460776846918e-05, + "loss": 0.0543, + "step": 9030 + }, + { + "epoch": 10.13, + "learning_rate": 2.540974866717441e-05, + "loss": 0.0568, + "step": 9040 + }, + { + "epoch": 10.15, + "learning_rate": 2.5404036557501905e-05, + "loss": 0.0491, + "step": 9050 + }, + { + "epoch": 10.16, + "learning_rate": 2.53983244478294e-05, + "loss": 0.0573, + "step": 9060 + }, + { + "epoch": 10.17, + "learning_rate": 2.5392612338156892e-05, + "loss": 0.0765, + "step": 9070 + }, + { + "epoch": 10.18, + "learning_rate": 2.5386900228484385e-05, + "loss": 0.0528, + "step": 9080 + }, + { + "epoch": 10.19, + "learning_rate": 2.5381188118811882e-05, + "loss": 0.0508, + "step": 9090 + }, + { + "epoch": 10.2, + "learning_rate": 2.5375476009139376e-05, + "loss": 0.052, + "step": 9100 + }, + { + "epoch": 10.21, + "learning_rate": 2.536976389946687e-05, + "loss": 0.0508, + "step": 9110 + }, + { + "epoch": 10.22, + "learning_rate": 2.5364051789794363e-05, + "loss": 0.0678, + "step": 9120 + }, + { + "epoch": 10.24, + "learning_rate": 2.535833968012186e-05, + "loss": 0.0586, + "step": 9130 + }, + { + "epoch": 10.25, + "learning_rate": 2.5352627570449354e-05, + "loss": 0.051, + "step": 9140 + }, + { + "epoch": 10.26, + "learning_rate": 2.5346915460776847e-05, + "loss": 0.0519, + "step": 9150 + }, + { + "epoch": 10.27, + "learning_rate": 2.5341203351104344e-05, + "loss": 0.0522, + "step": 9160 + }, + { + "epoch": 10.28, + "learning_rate": 2.5335491241431838e-05, + "loss": 0.0844, + "step": 9170 + }, + { + "epoch": 10.29, + "learning_rate": 2.532977913175933e-05, + "loss": 0.0579, + "step": 9180 + }, + { + "epoch": 10.3, + "learning_rate": 2.5324067022086825e-05, + "loss": 0.0562, + "step": 9190 + }, + { + "epoch": 10.31, + "learning_rate": 2.5318354912414318e-05, + "loss": 0.053, + "step": 9200 + }, + { + "epoch": 10.33, + "learning_rate": 2.5312642802741812e-05, + "loss": 0.049, + "step": 9210 + }, + { + "epoch": 10.34, + "learning_rate": 2.5306930693069305e-05, + "loss": 0.0668, + "step": 9220 + }, + { + "epoch": 10.35, + "learning_rate": 2.5301218583396802e-05, + "loss": 0.0574, + "step": 9230 + }, + { + "epoch": 10.36, + "learning_rate": 2.5295506473724296e-05, + "loss": 0.0583, + "step": 9240 + }, + { + "epoch": 10.37, + "learning_rate": 2.528979436405179e-05, + "loss": 0.052, + "step": 9250 + }, + { + "epoch": 10.38, + "learning_rate": 2.5284082254379286e-05, + "loss": 0.0482, + "step": 9260 + }, + { + "epoch": 10.39, + "learning_rate": 2.527837014470678e-05, + "loss": 0.0638, + "step": 9270 + }, + { + "epoch": 10.4, + "learning_rate": 2.5272658035034273e-05, + "loss": 0.0563, + "step": 9280 + }, + { + "epoch": 10.41, + "learning_rate": 2.5266945925361767e-05, + "loss": 0.0573, + "step": 9290 + }, + { + "epoch": 10.43, + "learning_rate": 2.5261233815689264e-05, + "loss": 0.0533, + "step": 9300 + }, + { + "epoch": 10.44, + "learning_rate": 2.5255521706016757e-05, + "loss": 0.0486, + "step": 9310 + }, + { + "epoch": 10.45, + "learning_rate": 2.5249809596344247e-05, + "loss": 0.0642, + "step": 9320 + }, + { + "epoch": 10.46, + "learning_rate": 2.5244097486671744e-05, + "loss": 0.064, + "step": 9330 + }, + { + "epoch": 10.47, + "learning_rate": 2.5238385376999238e-05, + "loss": 0.0571, + "step": 9340 + }, + { + "epoch": 10.48, + "learning_rate": 2.523267326732673e-05, + "loss": 0.0539, + "step": 9350 + }, + { + "epoch": 10.49, + "learning_rate": 2.522696115765423e-05, + "loss": 0.0541, + "step": 9360 + }, + { + "epoch": 10.5, + "learning_rate": 2.5221249047981722e-05, + "loss": 0.0696, + "step": 9370 + }, + { + "epoch": 10.52, + "learning_rate": 2.5215536938309215e-05, + "loss": 0.0553, + "step": 9380 + }, + { + "epoch": 10.53, + "learning_rate": 2.520982482863671e-05, + "loss": 0.05, + "step": 9390 + }, + { + "epoch": 10.54, + "learning_rate": 2.5204112718964206e-05, + "loss": 0.055, + "step": 9400 + }, + { + "epoch": 10.55, + "learning_rate": 2.51984006092917e-05, + "loss": 0.0487, + "step": 9410 + }, + { + "epoch": 10.56, + "learning_rate": 2.5192688499619193e-05, + "loss": 0.0664, + "step": 9420 + }, + { + "epoch": 10.57, + "learning_rate": 2.518697638994669e-05, + "loss": 0.053, + "step": 9430 + }, + { + "epoch": 10.58, + "learning_rate": 2.5181264280274184e-05, + "loss": 0.053, + "step": 9440 + }, + { + "epoch": 10.59, + "learning_rate": 2.5175552170601677e-05, + "loss": 0.0511, + "step": 9450 + }, + { + "epoch": 10.61, + "learning_rate": 2.516984006092917e-05, + "loss": 0.0548, + "step": 9460 + }, + { + "epoch": 10.62, + "learning_rate": 2.5164127951256664e-05, + "loss": 0.0709, + "step": 9470 + }, + { + "epoch": 10.63, + "learning_rate": 2.5158415841584158e-05, + "loss": 0.0526, + "step": 9480 + }, + { + "epoch": 10.64, + "learning_rate": 2.515270373191165e-05, + "loss": 0.0544, + "step": 9490 + }, + { + "epoch": 10.65, + "learning_rate": 2.5146991622239148e-05, + "loss": 0.0489, + "step": 9500 + }, + { + "epoch": 10.66, + "learning_rate": 2.5141279512566642e-05, + "loss": 0.0502, + "step": 9510 + }, + { + "epoch": 10.67, + "learning_rate": 2.5135567402894135e-05, + "loss": 0.0686, + "step": 9520 + }, + { + "epoch": 10.68, + "learning_rate": 2.5129855293221632e-05, + "loss": 0.0536, + "step": 9530 + }, + { + "epoch": 10.7, + "learning_rate": 2.5124143183549126e-05, + "loss": 0.0539, + "step": 9540 + }, + { + "epoch": 10.71, + "learning_rate": 2.511843107387662e-05, + "loss": 0.0538, + "step": 9550 + }, + { + "epoch": 10.72, + "learning_rate": 2.5112718964204113e-05, + "loss": 0.0586, + "step": 9560 + }, + { + "epoch": 10.73, + "learning_rate": 2.510700685453161e-05, + "loss": 0.0722, + "step": 9570 + }, + { + "epoch": 10.74, + "learning_rate": 2.5101294744859103e-05, + "loss": 0.0566, + "step": 9580 + }, + { + "epoch": 10.75, + "learning_rate": 2.5095582635186593e-05, + "loss": 0.0494, + "step": 9590 + }, + { + "epoch": 10.76, + "learning_rate": 2.508987052551409e-05, + "loss": 0.0499, + "step": 9600 + }, + { + "epoch": 10.77, + "learning_rate": 2.5084158415841584e-05, + "loss": 0.0551, + "step": 9610 + }, + { + "epoch": 10.78, + "learning_rate": 2.5078446306169077e-05, + "loss": 0.0687, + "step": 9620 + }, + { + "epoch": 10.8, + "learning_rate": 2.5072734196496574e-05, + "loss": 0.0553, + "step": 9630 + }, + { + "epoch": 10.81, + "learning_rate": 2.5067022086824068e-05, + "loss": 0.0553, + "step": 9640 + }, + { + "epoch": 10.82, + "learning_rate": 2.506130997715156e-05, + "loss": 0.0497, + "step": 9650 + }, + { + "epoch": 10.83, + "learning_rate": 2.5055597867479055e-05, + "loss": 0.0544, + "step": 9660 + }, + { + "epoch": 10.84, + "learning_rate": 2.5049885757806552e-05, + "loss": 0.065, + "step": 9670 + }, + { + "epoch": 10.85, + "learning_rate": 2.5044173648134045e-05, + "loss": 0.0585, + "step": 9680 + }, + { + "epoch": 10.86, + "learning_rate": 2.503846153846154e-05, + "loss": 0.0581, + "step": 9690 + }, + { + "epoch": 10.87, + "learning_rate": 2.5032749428789036e-05, + "loss": 0.0513, + "step": 9700 + }, + { + "epoch": 10.89, + "learning_rate": 2.502703731911653e-05, + "loss": 0.0556, + "step": 9710 + }, + { + "epoch": 10.9, + "learning_rate": 2.5021896420411272e-05, + "loss": 0.0687, + "step": 9720 + }, + { + "epoch": 10.91, + "learning_rate": 2.501618431073877e-05, + "loss": 0.0581, + "step": 9730 + }, + { + "epoch": 10.92, + "learning_rate": 2.5010472201066262e-05, + "loss": 0.0531, + "step": 9740 + }, + { + "epoch": 10.93, + "learning_rate": 2.5004760091393756e-05, + "loss": 0.0529, + "step": 9750 + }, + { + "epoch": 10.94, + "learning_rate": 2.499904798172125e-05, + "loss": 0.0519, + "step": 9760 + }, + { + "epoch": 10.95, + "learning_rate": 2.4993335872048743e-05, + "loss": 0.0767, + "step": 9770 + }, + { + "epoch": 10.96, + "learning_rate": 2.4987623762376237e-05, + "loss": 0.0516, + "step": 9780 + }, + { + "epoch": 10.98, + "learning_rate": 2.498191165270373e-05, + "loss": 0.0512, + "step": 9790 + }, + { + "epoch": 10.99, + "learning_rate": 2.4976199543031227e-05, + "loss": 0.0522, + "step": 9800 + }, + { + "epoch": 11.0, + "learning_rate": 2.497048743335872e-05, + "loss": 0.0594, + "step": 9810 + }, + { + "epoch": 11.0, + "eval_cer": 1.1526371870005327, + "eval_loss": 0.09285453706979752, + "eval_runtime": 491.1637, + "eval_samples_per_second": 10.838, + "eval_steps_per_second": 1.356, + "eval_wer": 3.1543626807869574, + "step": 9812 + }, + { + "epoch": 11.01, + "learning_rate": 2.4964775323686214e-05, + "loss": 0.0638, + "step": 9820 + }, + { + "epoch": 11.02, + "learning_rate": 2.495906321401371e-05, + "loss": 0.049, + "step": 9830 + }, + { + "epoch": 11.03, + "learning_rate": 2.4953351104341205e-05, + "loss": 0.0433, + "step": 9840 + }, + { + "epoch": 11.04, + "learning_rate": 2.4947638994668698e-05, + "loss": 0.051, + "step": 9850 + }, + { + "epoch": 11.05, + "learning_rate": 2.4941926884996192e-05, + "loss": 0.0523, + "step": 9860 + }, + { + "epoch": 11.07, + "learning_rate": 2.493621477532369e-05, + "loss": 0.0588, + "step": 9870 + }, + { + "epoch": 11.08, + "learning_rate": 2.4930502665651182e-05, + "loss": 0.0512, + "step": 9880 + }, + { + "epoch": 11.09, + "learning_rate": 2.4924790555978676e-05, + "loss": 0.0471, + "step": 9890 + }, + { + "epoch": 11.1, + "learning_rate": 2.491907844630617e-05, + "loss": 0.0478, + "step": 9900 + }, + { + "epoch": 11.11, + "learning_rate": 2.4913366336633663e-05, + "loss": 0.051, + "step": 9910 + }, + { + "epoch": 11.12, + "learning_rate": 2.4907654226961156e-05, + "loss": 0.0616, + "step": 9920 + }, + { + "epoch": 11.13, + "learning_rate": 2.4901942117288653e-05, + "loss": 0.0483, + "step": 9930 + }, + { + "epoch": 11.14, + "learning_rate": 2.4896230007616147e-05, + "loss": 0.0453, + "step": 9940 + }, + { + "epoch": 11.15, + "learning_rate": 2.489051789794364e-05, + "loss": 0.0467, + "step": 9950 + }, + { + "epoch": 11.17, + "learning_rate": 2.4884805788271134e-05, + "loss": 0.0505, + "step": 9960 + }, + { + "epoch": 11.18, + "learning_rate": 2.487909367859863e-05, + "loss": 0.0609, + "step": 9970 + }, + { + "epoch": 11.19, + "learning_rate": 2.4873381568926124e-05, + "loss": 0.0495, + "step": 9980 + }, + { + "epoch": 11.2, + "learning_rate": 2.4867669459253618e-05, + "loss": 0.0429, + "step": 9990 + }, + { + "epoch": 11.21, + "learning_rate": 2.4861957349581115e-05, + "loss": 0.0452, + "step": 10000 + }, + { + "epoch": 11.22, + "learning_rate": 2.485624523990861e-05, + "loss": 0.0488, + "step": 10010 + }, + { + "epoch": 11.23, + "learning_rate": 2.4850533130236102e-05, + "loss": 0.0612, + "step": 10020 + }, + { + "epoch": 11.24, + "learning_rate": 2.4844821020563596e-05, + "loss": 0.047, + "step": 10030 + }, + { + "epoch": 11.26, + "learning_rate": 2.483910891089109e-05, + "loss": 0.0481, + "step": 10040 + }, + { + "epoch": 11.27, + "learning_rate": 2.4833396801218583e-05, + "loss": 0.0498, + "step": 10050 + }, + { + "epoch": 11.28, + "learning_rate": 2.4827684691546076e-05, + "loss": 0.0472, + "step": 10060 + }, + { + "epoch": 11.29, + "learning_rate": 2.4821972581873573e-05, + "loss": 0.0591, + "step": 10070 + }, + { + "epoch": 11.3, + "learning_rate": 2.4816260472201067e-05, + "loss": 0.05, + "step": 10080 + }, + { + "epoch": 11.31, + "learning_rate": 2.481054836252856e-05, + "loss": 0.0465, + "step": 10090 + }, + { + "epoch": 11.32, + "learning_rate": 2.4804836252856057e-05, + "loss": 0.0506, + "step": 10100 + }, + { + "epoch": 11.33, + "learning_rate": 2.479912414318355e-05, + "loss": 0.0607, + "step": 10110 + }, + { + "epoch": 11.35, + "learning_rate": 2.4793412033511044e-05, + "loss": 0.0623, + "step": 10120 + }, + { + "epoch": 11.36, + "learning_rate": 2.4787699923838538e-05, + "loss": 0.0503, + "step": 10130 + }, + { + "epoch": 11.37, + "learning_rate": 2.4781987814166035e-05, + "loss": 0.044, + "step": 10140 + }, + { + "epoch": 11.38, + "learning_rate": 2.4776275704493528e-05, + "loss": 0.0453, + "step": 10150 + }, + { + "epoch": 11.39, + "learning_rate": 2.477056359482102e-05, + "loss": 0.0502, + "step": 10160 + }, + { + "epoch": 11.4, + "learning_rate": 2.4764851485148515e-05, + "loss": 0.0692, + "step": 10170 + }, + { + "epoch": 11.41, + "learning_rate": 2.475913937547601e-05, + "loss": 0.0495, + "step": 10180 + }, + { + "epoch": 11.42, + "learning_rate": 2.4753427265803502e-05, + "loss": 0.0483, + "step": 10190 + }, + { + "epoch": 11.43, + "learning_rate": 2.4747715156131e-05, + "loss": 0.0435, + "step": 10200 + }, + { + "epoch": 11.45, + "learning_rate": 2.4742003046458493e-05, + "loss": 0.0551, + "step": 10210 + }, + { + "epoch": 11.46, + "learning_rate": 2.4736290936785986e-05, + "loss": 0.0628, + "step": 10220 + }, + { + "epoch": 11.47, + "learning_rate": 2.473057882711348e-05, + "loss": 0.0467, + "step": 10230 + }, + { + "epoch": 11.48, + "learning_rate": 2.4724866717440977e-05, + "loss": 0.0434, + "step": 10240 + }, + { + "epoch": 11.49, + "learning_rate": 2.471915460776847e-05, + "loss": 0.0476, + "step": 10250 + }, + { + "epoch": 11.5, + "learning_rate": 2.4713442498095964e-05, + "loss": 0.0498, + "step": 10260 + }, + { + "epoch": 11.51, + "learning_rate": 2.470773038842346e-05, + "loss": 0.0684, + "step": 10270 + }, + { + "epoch": 11.52, + "learning_rate": 2.4702018278750954e-05, + "loss": 0.0489, + "step": 10280 + }, + { + "epoch": 11.54, + "learning_rate": 2.4696306169078445e-05, + "loss": 0.0498, + "step": 10290 + }, + { + "epoch": 11.55, + "learning_rate": 2.469059405940594e-05, + "loss": 0.047, + "step": 10300 + }, + { + "epoch": 11.56, + "learning_rate": 2.4684881949733435e-05, + "loss": 0.0549, + "step": 10310 + }, + { + "epoch": 11.57, + "learning_rate": 2.467916984006093e-05, + "loss": 0.0723, + "step": 10320 + }, + { + "epoch": 11.58, + "learning_rate": 2.4673457730388422e-05, + "loss": 0.0486, + "step": 10330 + }, + { + "epoch": 11.59, + "learning_rate": 2.466774562071592e-05, + "loss": 0.0489, + "step": 10340 + }, + { + "epoch": 11.6, + "learning_rate": 2.4662033511043413e-05, + "loss": 0.0464, + "step": 10350 + }, + { + "epoch": 11.61, + "learning_rate": 2.4656321401370906e-05, + "loss": 0.0459, + "step": 10360 + }, + { + "epoch": 11.63, + "learning_rate": 2.4650609291698403e-05, + "loss": 0.0543, + "step": 10370 + }, + { + "epoch": 11.64, + "learning_rate": 2.4644897182025897e-05, + "loss": 0.0459, + "step": 10380 + }, + { + "epoch": 11.65, + "learning_rate": 2.463918507235339e-05, + "loss": 0.0442, + "step": 10390 + }, + { + "epoch": 11.66, + "learning_rate": 2.4633472962680884e-05, + "loss": 0.0499, + "step": 10400 + }, + { + "epoch": 11.67, + "learning_rate": 2.462776085300838e-05, + "loss": 0.0504, + "step": 10410 + }, + { + "epoch": 11.68, + "learning_rate": 2.4622048743335874e-05, + "loss": 0.0571, + "step": 10420 + }, + { + "epoch": 11.69, + "learning_rate": 2.4616336633663364e-05, + "loss": 0.0461, + "step": 10430 + }, + { + "epoch": 11.7, + "learning_rate": 2.461062452399086e-05, + "loss": 0.0462, + "step": 10440 + }, + { + "epoch": 11.72, + "learning_rate": 2.4604912414318355e-05, + "loss": 0.044, + "step": 10450 + }, + { + "epoch": 11.73, + "learning_rate": 2.459920030464585e-05, + "loss": 0.0526, + "step": 10460 + }, + { + "epoch": 11.74, + "learning_rate": 2.4593488194973345e-05, + "loss": 0.0586, + "step": 10470 + }, + { + "epoch": 11.75, + "learning_rate": 2.458777608530084e-05, + "loss": 0.047, + "step": 10480 + }, + { + "epoch": 11.76, + "learning_rate": 2.4582063975628332e-05, + "loss": 0.0472, + "step": 10490 + }, + { + "epoch": 11.77, + "learning_rate": 2.4576351865955826e-05, + "loss": 0.0444, + "step": 10500 + }, + { + "epoch": 11.78, + "learning_rate": 2.4570639756283323e-05, + "loss": 0.0503, + "step": 10510 + }, + { + "epoch": 11.79, + "learning_rate": 2.4564927646610816e-05, + "loss": 0.061, + "step": 10520 + }, + { + "epoch": 11.8, + "learning_rate": 2.455921553693831e-05, + "loss": 0.0464, + "step": 10530 + }, + { + "epoch": 11.82, + "learning_rate": 2.4553503427265807e-05, + "loss": 0.0472, + "step": 10540 + }, + { + "epoch": 11.83, + "learning_rate": 2.45477913175933e-05, + "loss": 0.0465, + "step": 10550 + }, + { + "epoch": 11.84, + "learning_rate": 2.454207920792079e-05, + "loss": 0.054, + "step": 10560 + }, + { + "epoch": 11.85, + "learning_rate": 2.4536367098248284e-05, + "loss": 0.0571, + "step": 10570 + }, + { + "epoch": 11.86, + "learning_rate": 2.453065498857578e-05, + "loss": 0.0489, + "step": 10580 + }, + { + "epoch": 11.87, + "learning_rate": 2.4524942878903275e-05, + "loss": 0.0453, + "step": 10590 + }, + { + "epoch": 11.88, + "learning_rate": 2.4519230769230768e-05, + "loss": 0.0483, + "step": 10600 + }, + { + "epoch": 11.89, + "learning_rate": 2.4513518659558265e-05, + "loss": 0.0563, + "step": 10610 + }, + { + "epoch": 11.91, + "learning_rate": 2.450780654988576e-05, + "loss": 0.0554, + "step": 10620 + }, + { + "epoch": 11.92, + "learning_rate": 2.4502094440213252e-05, + "loss": 0.0479, + "step": 10630 + }, + { + "epoch": 11.93, + "learning_rate": 2.449638233054075e-05, + "loss": 0.0512, + "step": 10640 + }, + { + "epoch": 11.94, + "learning_rate": 2.4490670220868243e-05, + "loss": 0.0456, + "step": 10650 + }, + { + "epoch": 11.95, + "learning_rate": 2.4484958111195736e-05, + "loss": 0.0502, + "step": 10660 + }, + { + "epoch": 11.96, + "learning_rate": 2.447924600152323e-05, + "loss": 0.0567, + "step": 10670 + }, + { + "epoch": 11.97, + "learning_rate": 2.4473533891850727e-05, + "loss": 0.0463, + "step": 10680 + }, + { + "epoch": 11.98, + "learning_rate": 2.4467821782178217e-05, + "loss": 0.0477, + "step": 10690 + }, + { + "epoch": 12.0, + "learning_rate": 2.446210967250571e-05, + "loss": 0.0504, + "step": 10700 + }, + { + "epoch": 12.0, + "eval_cer": 1.1531699520511454, + "eval_loss": 0.08872469514608383, + "eval_runtime": 476.1443, + "eval_samples_per_second": 11.179, + "eval_steps_per_second": 1.399, + "eval_wer": 3.152756269866518, + "step": 10704 + }, + { + "epoch": 12.01, + "learning_rate": 2.4456397562833207e-05, + "loss": 0.0559, + "step": 10710 + }, + { + "epoch": 12.02, + "learning_rate": 2.44506854531607e-05, + "loss": 0.0396, + "step": 10720 + }, + { + "epoch": 12.03, + "learning_rate": 2.4444973343488194e-05, + "loss": 0.0422, + "step": 10730 + }, + { + "epoch": 12.04, + "learning_rate": 2.4439261233815688e-05, + "loss": 0.0436, + "step": 10740 + }, + { + "epoch": 12.05, + "learning_rate": 2.4433549124143185e-05, + "loss": 0.0423, + "step": 10750 + }, + { + "epoch": 12.06, + "learning_rate": 2.442783701447068e-05, + "loss": 0.0537, + "step": 10760 + }, + { + "epoch": 12.07, + "learning_rate": 2.4422124904798172e-05, + "loss": 0.0402, + "step": 10770 + }, + { + "epoch": 12.09, + "learning_rate": 2.441641279512567e-05, + "loss": 0.0493, + "step": 10780 + }, + { + "epoch": 12.1, + "learning_rate": 2.4410700685453162e-05, + "loss": 0.0427, + "step": 10790 + }, + { + "epoch": 12.11, + "learning_rate": 2.4404988575780656e-05, + "loss": 0.0411, + "step": 10800 + }, + { + "epoch": 12.12, + "learning_rate": 2.4399276466108153e-05, + "loss": 0.0606, + "step": 10810 + }, + { + "epoch": 12.13, + "learning_rate": 2.4393564356435643e-05, + "loss": 0.0416, + "step": 10820 + }, + { + "epoch": 12.14, + "learning_rate": 2.4387852246763137e-05, + "loss": 0.0472, + "step": 10830 + }, + { + "epoch": 12.15, + "learning_rate": 2.438214013709063e-05, + "loss": 0.0382, + "step": 10840 + }, + { + "epoch": 12.16, + "learning_rate": 2.4376428027418127e-05, + "loss": 0.049, + "step": 10850 + }, + { + "epoch": 12.17, + "learning_rate": 2.437071591774562e-05, + "loss": 0.0573, + "step": 10860 + }, + { + "epoch": 12.19, + "learning_rate": 2.4365003808073114e-05, + "loss": 0.0443, + "step": 10870 + }, + { + "epoch": 12.2, + "learning_rate": 2.435929169840061e-05, + "loss": 0.0412, + "step": 10880 + }, + { + "epoch": 12.21, + "learning_rate": 2.4353579588728105e-05, + "loss": 0.0424, + "step": 10890 + }, + { + "epoch": 12.22, + "learning_rate": 2.4347867479055598e-05, + "loss": 0.0381, + "step": 10900 + }, + { + "epoch": 12.23, + "learning_rate": 2.4342155369383095e-05, + "loss": 0.0534, + "step": 10910 + }, + { + "epoch": 12.24, + "learning_rate": 2.433644325971059e-05, + "loss": 0.0419, + "step": 10920 + }, + { + "epoch": 12.25, + "learning_rate": 2.4330731150038082e-05, + "loss": 0.0406, + "step": 10930 + }, + { + "epoch": 12.26, + "learning_rate": 2.4325019040365576e-05, + "loss": 0.0464, + "step": 10940 + }, + { + "epoch": 12.28, + "learning_rate": 2.4319306930693073e-05, + "loss": 0.0425, + "step": 10950 + }, + { + "epoch": 12.29, + "learning_rate": 2.4313594821020563e-05, + "loss": 0.0588, + "step": 10960 + }, + { + "epoch": 12.3, + "learning_rate": 2.4307882711348056e-05, + "loss": 0.0488, + "step": 10970 + }, + { + "epoch": 12.31, + "learning_rate": 2.4302170601675553e-05, + "loss": 0.0398, + "step": 10980 + }, + { + "epoch": 12.32, + "learning_rate": 2.4296458492003047e-05, + "loss": 0.041, + "step": 10990 + }, + { + "epoch": 12.33, + "learning_rate": 2.429074638233054e-05, + "loss": 0.0452, + "step": 11000 + }, + { + "epoch": 12.34, + "learning_rate": 2.4285034272658034e-05, + "loss": 0.0549, + "step": 11010 + }, + { + "epoch": 12.35, + "learning_rate": 2.427932216298553e-05, + "loss": 0.0437, + "step": 11020 + }, + { + "epoch": 12.37, + "learning_rate": 2.4273610053313024e-05, + "loss": 0.0442, + "step": 11030 + }, + { + "epoch": 12.38, + "learning_rate": 2.4267897943640518e-05, + "loss": 0.0439, + "step": 11040 + }, + { + "epoch": 12.39, + "learning_rate": 2.4262185833968015e-05, + "loss": 0.0427, + "step": 11050 + }, + { + "epoch": 12.4, + "learning_rate": 2.425647372429551e-05, + "loss": 0.0553, + "step": 11060 + }, + { + "epoch": 12.41, + "learning_rate": 2.4250761614623002e-05, + "loss": 0.0446, + "step": 11070 + }, + { + "epoch": 12.42, + "learning_rate": 2.42450495049505e-05, + "loss": 0.0431, + "step": 11080 + }, + { + "epoch": 12.43, + "learning_rate": 2.423933739527799e-05, + "loss": 0.0395, + "step": 11090 + }, + { + "epoch": 12.44, + "learning_rate": 2.4233625285605483e-05, + "loss": 0.0404, + "step": 11100 + }, + { + "epoch": 12.46, + "learning_rate": 2.4227913175932976e-05, + "loss": 0.0609, + "step": 11110 + }, + { + "epoch": 12.47, + "learning_rate": 2.4222201066260473e-05, + "loss": 0.0421, + "step": 11120 + }, + { + "epoch": 12.48, + "learning_rate": 2.4216488956587967e-05, + "loss": 0.0434, + "step": 11130 + }, + { + "epoch": 12.49, + "learning_rate": 2.421077684691546e-05, + "loss": 0.0408, + "step": 11140 + }, + { + "epoch": 12.5, + "learning_rate": 2.4205064737242957e-05, + "loss": 0.0465, + "step": 11150 + }, + { + "epoch": 12.51, + "learning_rate": 2.419935262757045e-05, + "loss": 0.0613, + "step": 11160 + }, + { + "epoch": 12.52, + "learning_rate": 2.4193640517897944e-05, + "loss": 0.044, + "step": 11170 + }, + { + "epoch": 12.53, + "learning_rate": 2.4187928408225438e-05, + "loss": 0.0432, + "step": 11180 + }, + { + "epoch": 12.54, + "learning_rate": 2.4182216298552935e-05, + "loss": 0.0447, + "step": 11190 + }, + { + "epoch": 12.56, + "learning_rate": 2.4176504188880428e-05, + "loss": 0.0429, + "step": 11200 + }, + { + "epoch": 12.57, + "learning_rate": 2.417079207920792e-05, + "loss": 0.0624, + "step": 11210 + }, + { + "epoch": 12.58, + "learning_rate": 2.4165079969535415e-05, + "loss": 0.0445, + "step": 11220 + }, + { + "epoch": 12.59, + "learning_rate": 2.415936785986291e-05, + "loss": 0.0418, + "step": 11230 + }, + { + "epoch": 12.6, + "learning_rate": 2.4153655750190402e-05, + "loss": 0.0407, + "step": 11240 + }, + { + "epoch": 12.61, + "learning_rate": 2.41479436405179e-05, + "loss": 0.041, + "step": 11250 + }, + { + "epoch": 12.62, + "learning_rate": 2.4142231530845393e-05, + "loss": 0.0577, + "step": 11260 + }, + { + "epoch": 12.63, + "learning_rate": 2.4136519421172886e-05, + "loss": 0.0477, + "step": 11270 + }, + { + "epoch": 12.65, + "learning_rate": 2.413080731150038e-05, + "loss": 0.0452, + "step": 11280 + }, + { + "epoch": 12.66, + "learning_rate": 2.4125095201827877e-05, + "loss": 0.0413, + "step": 11290 + }, + { + "epoch": 12.67, + "learning_rate": 2.411938309215537e-05, + "loss": 0.0425, + "step": 11300 + }, + { + "epoch": 12.68, + "learning_rate": 2.4113670982482864e-05, + "loss": 0.0569, + "step": 11310 + }, + { + "epoch": 12.69, + "learning_rate": 2.410795887281036e-05, + "loss": 0.042, + "step": 11320 + }, + { + "epoch": 12.7, + "learning_rate": 2.4102246763137854e-05, + "loss": 0.0392, + "step": 11330 + }, + { + "epoch": 12.71, + "learning_rate": 2.4096534653465348e-05, + "loss": 0.0426, + "step": 11340 + }, + { + "epoch": 12.72, + "learning_rate": 2.409082254379284e-05, + "loss": 0.042, + "step": 11350 + }, + { + "epoch": 12.74, + "learning_rate": 2.4085110434120335e-05, + "loss": 0.0594, + "step": 11360 + }, + { + "epoch": 12.75, + "learning_rate": 2.407939832444783e-05, + "loss": 0.0435, + "step": 11370 + }, + { + "epoch": 12.76, + "learning_rate": 2.4073686214775322e-05, + "loss": 0.0433, + "step": 11380 + }, + { + "epoch": 12.77, + "learning_rate": 2.406797410510282e-05, + "loss": 0.0406, + "step": 11390 + }, + { + "epoch": 12.78, + "learning_rate": 2.4062261995430313e-05, + "loss": 0.0412, + "step": 11400 + }, + { + "epoch": 12.79, + "learning_rate": 2.4056549885757806e-05, + "loss": 0.0668, + "step": 11410 + }, + { + "epoch": 12.8, + "learning_rate": 2.4050837776085303e-05, + "loss": 0.0466, + "step": 11420 + }, + { + "epoch": 12.81, + "learning_rate": 2.4045125666412797e-05, + "loss": 0.0409, + "step": 11430 + }, + { + "epoch": 12.83, + "learning_rate": 2.403941355674029e-05, + "loss": 0.0428, + "step": 11440 + }, + { + "epoch": 12.84, + "learning_rate": 2.4033701447067784e-05, + "loss": 0.0374, + "step": 11450 + }, + { + "epoch": 12.85, + "learning_rate": 2.402798933739528e-05, + "loss": 0.058, + "step": 11460 + }, + { + "epoch": 12.86, + "learning_rate": 2.4022277227722774e-05, + "loss": 0.0439, + "step": 11470 + }, + { + "epoch": 12.87, + "learning_rate": 2.4016565118050268e-05, + "loss": 0.0415, + "step": 11480 + }, + { + "epoch": 12.88, + "learning_rate": 2.401085300837776e-05, + "loss": 0.0393, + "step": 11490 + }, + { + "epoch": 12.89, + "learning_rate": 2.4005140898705255e-05, + "loss": 0.0458, + "step": 11500 + }, + { + "epoch": 12.9, + "learning_rate": 2.3999428789032748e-05, + "loss": 0.0599, + "step": 11510 + }, + { + "epoch": 12.91, + "learning_rate": 2.3993716679360245e-05, + "loss": 0.0424, + "step": 11520 + }, + { + "epoch": 12.93, + "learning_rate": 2.398800456968774e-05, + "loss": 0.0413, + "step": 11530 + }, + { + "epoch": 12.94, + "learning_rate": 2.3982292460015232e-05, + "loss": 0.0409, + "step": 11540 + }, + { + "epoch": 12.95, + "learning_rate": 2.3976580350342726e-05, + "loss": 0.0422, + "step": 11550 + }, + { + "epoch": 12.96, + "learning_rate": 2.3970868240670223e-05, + "loss": 0.0553, + "step": 11560 + }, + { + "epoch": 12.97, + "learning_rate": 2.3965156130997716e-05, + "loss": 0.0409, + "step": 11570 + }, + { + "epoch": 12.98, + "learning_rate": 2.395944402132521e-05, + "loss": 0.0471, + "step": 11580 + }, + { + "epoch": 12.99, + "learning_rate": 2.3953731911652707e-05, + "loss": 0.0433, + "step": 11590 + }, + { + "epoch": 13.0, + "eval_cer": 1.1588298845923233, + "eval_loss": 0.09152624756097794, + "eval_runtime": 476.6374, + "eval_samples_per_second": 11.168, + "eval_steps_per_second": 1.397, + "eval_wer": 3.144214160078454, + "step": 11596 + }, + { + "epoch": 13.0, + "learning_rate": 2.39480198019802e-05, + "loss": 0.0631, + "step": 11600 + }, + { + "epoch": 13.02, + "learning_rate": 2.3942307692307694e-05, + "loss": 0.0423, + "step": 11610 + }, + { + "epoch": 13.03, + "learning_rate": 2.3936595582635184e-05, + "loss": 0.0406, + "step": 11620 + }, + { + "epoch": 13.04, + "learning_rate": 2.393088347296268e-05, + "loss": 0.0331, + "step": 11630 + }, + { + "epoch": 13.05, + "learning_rate": 2.3925171363290174e-05, + "loss": 0.0361, + "step": 11640 + }, + { + "epoch": 13.06, + "learning_rate": 2.3919459253617668e-05, + "loss": 0.0504, + "step": 11650 + }, + { + "epoch": 13.07, + "learning_rate": 2.3913747143945165e-05, + "loss": 0.0377, + "step": 11660 + }, + { + "epoch": 13.08, + "learning_rate": 2.390803503427266e-05, + "loss": 0.0413, + "step": 11670 + }, + { + "epoch": 13.09, + "learning_rate": 2.3902322924600152e-05, + "loss": 0.0373, + "step": 11680 + }, + { + "epoch": 13.11, + "learning_rate": 2.389661081492765e-05, + "loss": 0.0412, + "step": 11690 + }, + { + "epoch": 13.12, + "learning_rate": 2.3890898705255143e-05, + "loss": 0.0531, + "step": 11700 + }, + { + "epoch": 13.13, + "learning_rate": 2.3885186595582636e-05, + "loss": 0.0446, + "step": 11710 + }, + { + "epoch": 13.14, + "learning_rate": 2.387947448591013e-05, + "loss": 0.0363, + "step": 11720 + }, + { + "epoch": 13.15, + "learning_rate": 2.3873762376237627e-05, + "loss": 0.0383, + "step": 11730 + }, + { + "epoch": 13.16, + "learning_rate": 2.386805026656512e-05, + "loss": 0.0377, + "step": 11740 + }, + { + "epoch": 13.17, + "learning_rate": 2.386233815689261e-05, + "loss": 0.0492, + "step": 11750 + }, + { + "epoch": 13.18, + "learning_rate": 2.3856626047220107e-05, + "loss": 0.0425, + "step": 11760 + }, + { + "epoch": 13.2, + "learning_rate": 2.38509139375476e-05, + "loss": 0.0391, + "step": 11770 + }, + { + "epoch": 13.21, + "learning_rate": 2.3845201827875094e-05, + "loss": 0.0381, + "step": 11780 + }, + { + "epoch": 13.22, + "learning_rate": 2.383948971820259e-05, + "loss": 0.0353, + "step": 11790 + }, + { + "epoch": 13.23, + "learning_rate": 2.3833777608530085e-05, + "loss": 0.0568, + "step": 11800 + }, + { + "epoch": 13.24, + "learning_rate": 2.3828065498857578e-05, + "loss": 0.041, + "step": 11810 + }, + { + "epoch": 13.25, + "learning_rate": 2.3822353389185072e-05, + "loss": 0.0406, + "step": 11820 + }, + { + "epoch": 13.26, + "learning_rate": 2.381664127951257e-05, + "loss": 0.0404, + "step": 11830 + }, + { + "epoch": 13.27, + "learning_rate": 2.3810929169840062e-05, + "loss": 0.0394, + "step": 11840 + }, + { + "epoch": 13.28, + "learning_rate": 2.3805217060167556e-05, + "loss": 0.0513, + "step": 11850 + }, + { + "epoch": 13.3, + "learning_rate": 2.3799504950495053e-05, + "loss": 0.0389, + "step": 11860 + }, + { + "epoch": 13.31, + "learning_rate": 2.3793792840822546e-05, + "loss": 0.0358, + "step": 11870 + }, + { + "epoch": 13.32, + "learning_rate": 2.378808073115004e-05, + "loss": 0.0369, + "step": 11880 + }, + { + "epoch": 13.33, + "learning_rate": 2.378236862147753e-05, + "loss": 0.0374, + "step": 11890 + }, + { + "epoch": 13.34, + "learning_rate": 2.3776656511805027e-05, + "loss": 0.0545, + "step": 11900 + }, + { + "epoch": 13.35, + "learning_rate": 2.377094440213252e-05, + "loss": 0.0437, + "step": 11910 + }, + { + "epoch": 13.36, + "learning_rate": 2.3765232292460014e-05, + "loss": 0.038, + "step": 11920 + }, + { + "epoch": 13.37, + "learning_rate": 2.375952018278751e-05, + "loss": 0.0392, + "step": 11930 + }, + { + "epoch": 13.39, + "learning_rate": 2.3753808073115004e-05, + "loss": 0.0411, + "step": 11940 + }, + { + "epoch": 13.4, + "learning_rate": 2.3748095963442498e-05, + "loss": 0.0546, + "step": 11950 + }, + { + "epoch": 13.41, + "learning_rate": 2.3742383853769995e-05, + "loss": 0.0397, + "step": 11960 + }, + { + "epoch": 13.42, + "learning_rate": 2.373667174409749e-05, + "loss": 0.0368, + "step": 11970 + }, + { + "epoch": 13.43, + "learning_rate": 2.3730959634424982e-05, + "loss": 0.0369, + "step": 11980 + }, + { + "epoch": 13.44, + "learning_rate": 2.3725247524752476e-05, + "loss": 0.035, + "step": 11990 + }, + { + "epoch": 13.45, + "learning_rate": 2.3719535415079973e-05, + "loss": 0.0594, + "step": 12000 + }, + { + "epoch": 13.46, + "learning_rate": 2.3713823305407466e-05, + "loss": 0.04, + "step": 12010 + }, + { + "epoch": 13.48, + "learning_rate": 2.3708111195734956e-05, + "loss": 0.0365, + "step": 12020 + }, + { + "epoch": 13.49, + "learning_rate": 2.3702399086062453e-05, + "loss": 0.0381, + "step": 12030 + }, + { + "epoch": 13.5, + "learning_rate": 2.3696686976389947e-05, + "loss": 0.039, + "step": 12040 + }, + { + "epoch": 13.51, + "learning_rate": 2.369097486671744e-05, + "loss": 0.0507, + "step": 12050 + }, + { + "epoch": 13.52, + "learning_rate": 2.3685262757044934e-05, + "loss": 0.0387, + "step": 12060 + }, + { + "epoch": 13.53, + "learning_rate": 2.367955064737243e-05, + "loss": 0.0426, + "step": 12070 + }, + { + "epoch": 13.54, + "learning_rate": 2.3673838537699924e-05, + "loss": 0.0375, + "step": 12080 + }, + { + "epoch": 13.55, + "learning_rate": 2.3668126428027418e-05, + "loss": 0.0364, + "step": 12090 + }, + { + "epoch": 13.57, + "learning_rate": 2.3662414318354915e-05, + "loss": 0.0512, + "step": 12100 + }, + { + "epoch": 13.58, + "learning_rate": 2.3656702208682408e-05, + "loss": 0.0434, + "step": 12110 + }, + { + "epoch": 13.59, + "learning_rate": 2.3650990099009902e-05, + "loss": 0.04, + "step": 12120 + }, + { + "epoch": 13.6, + "learning_rate": 2.36452779893374e-05, + "loss": 0.0373, + "step": 12130 + }, + { + "epoch": 13.61, + "learning_rate": 2.3639565879664892e-05, + "loss": 0.0372, + "step": 12140 + }, + { + "epoch": 13.62, + "learning_rate": 2.3633853769992382e-05, + "loss": 0.0499, + "step": 12150 + }, + { + "epoch": 13.63, + "learning_rate": 2.3628141660319876e-05, + "loss": 0.0393, + "step": 12160 + }, + { + "epoch": 13.64, + "learning_rate": 2.3622429550647373e-05, + "loss": 0.0382, + "step": 12170 + }, + { + "epoch": 13.65, + "learning_rate": 2.3616717440974866e-05, + "loss": 0.0393, + "step": 12180 + }, + { + "epoch": 13.67, + "learning_rate": 2.361100533130236e-05, + "loss": 0.0385, + "step": 12190 + }, + { + "epoch": 13.68, + "learning_rate": 2.3605293221629857e-05, + "loss": 0.0514, + "step": 12200 + }, + { + "epoch": 13.69, + "learning_rate": 2.359958111195735e-05, + "loss": 0.0412, + "step": 12210 + }, + { + "epoch": 13.7, + "learning_rate": 2.3593869002284844e-05, + "loss": 0.0384, + "step": 12220 + }, + { + "epoch": 13.71, + "learning_rate": 2.3588156892612338e-05, + "loss": 0.0386, + "step": 12230 + }, + { + "epoch": 13.72, + "learning_rate": 2.3582444782939834e-05, + "loss": 0.0421, + "step": 12240 + }, + { + "epoch": 13.73, + "learning_rate": 2.3576732673267328e-05, + "loss": 0.0482, + "step": 12250 + }, + { + "epoch": 13.74, + "learning_rate": 2.357102056359482e-05, + "loss": 0.0392, + "step": 12260 + }, + { + "epoch": 13.76, + "learning_rate": 2.356530845392232e-05, + "loss": 0.0433, + "step": 12270 + }, + { + "epoch": 13.77, + "learning_rate": 2.355959634424981e-05, + "loss": 0.0388, + "step": 12280 + }, + { + "epoch": 13.78, + "learning_rate": 2.3553884234577302e-05, + "loss": 0.0404, + "step": 12290 + }, + { + "epoch": 13.79, + "learning_rate": 2.35481721249048e-05, + "loss": 0.0622, + "step": 12300 + }, + { + "epoch": 13.8, + "learning_rate": 2.3542460015232293e-05, + "loss": 0.041, + "step": 12310 + }, + { + "epoch": 13.81, + "learning_rate": 2.3536747905559786e-05, + "loss": 0.0384, + "step": 12320 + }, + { + "epoch": 13.82, + "learning_rate": 2.353103579588728e-05, + "loss": 0.0419, + "step": 12330 + }, + { + "epoch": 13.83, + "learning_rate": 2.3525323686214777e-05, + "loss": 0.0377, + "step": 12340 + }, + { + "epoch": 13.85, + "learning_rate": 2.351961157654227e-05, + "loss": 0.0549, + "step": 12350 + }, + { + "epoch": 13.86, + "learning_rate": 2.3513899466869764e-05, + "loss": 0.0381, + "step": 12360 + }, + { + "epoch": 13.87, + "learning_rate": 2.350818735719726e-05, + "loss": 0.0403, + "step": 12370 + }, + { + "epoch": 13.88, + "learning_rate": 2.3502475247524754e-05, + "loss": 0.0406, + "step": 12380 + }, + { + "epoch": 13.89, + "learning_rate": 2.3496763137852248e-05, + "loss": 0.0393, + "step": 12390 + }, + { + "epoch": 13.9, + "learning_rate": 2.3491051028179745e-05, + "loss": 0.0466, + "step": 12400 + }, + { + "epoch": 13.91, + "learning_rate": 2.3485338918507238e-05, + "loss": 0.0379, + "step": 12410 + }, + { + "epoch": 13.92, + "learning_rate": 2.347962680883473e-05, + "loss": 0.0413, + "step": 12420 + }, + { + "epoch": 13.93, + "learning_rate": 2.3473914699162222e-05, + "loss": 0.0413, + "step": 12430 + }, + { + "epoch": 13.95, + "learning_rate": 2.346820258948972e-05, + "loss": 0.0375, + "step": 12440 + }, + { + "epoch": 13.96, + "learning_rate": 2.3462490479817212e-05, + "loss": 0.0562, + "step": 12450 + }, + { + "epoch": 13.97, + "learning_rate": 2.3456778370144706e-05, + "loss": 0.0384, + "step": 12460 + }, + { + "epoch": 13.98, + "learning_rate": 2.3451066260472203e-05, + "loss": 0.0376, + "step": 12470 + }, + { + "epoch": 13.99, + "learning_rate": 2.3445354150799696e-05, + "loss": 0.0374, + "step": 12480 + }, + { + "epoch": 14.0, + "eval_cer": 1.1540294301641547, + "eval_loss": 0.08981550484895706, + "eval_runtime": 479.6018, + "eval_samples_per_second": 11.099, + "eval_steps_per_second": 1.389, + "eval_wer": 3.153274212503714, + "step": 12488 + }, + { + "epoch": 14.0, + "learning_rate": 2.343964204112719e-05, + "loss": 0.0504, + "step": 12490 + }, + { + "epoch": 14.01, + "learning_rate": 2.3433929931454684e-05, + "loss": 0.0326, + "step": 12500 + }, + { + "epoch": 14.02, + "learning_rate": 2.342821782178218e-05, + "loss": 0.031, + "step": 12510 + }, + { + "epoch": 14.04, + "learning_rate": 2.3422505712109674e-05, + "loss": 0.0352, + "step": 12520 + }, + { + "epoch": 14.05, + "learning_rate": 2.3416793602437168e-05, + "loss": 0.0335, + "step": 12530 + }, + { + "epoch": 14.06, + "learning_rate": 2.3411081492764664e-05, + "loss": 0.0537, + "step": 12540 + }, + { + "epoch": 14.07, + "learning_rate": 2.3405369383092155e-05, + "loss": 0.0364, + "step": 12550 + }, + { + "epoch": 14.08, + "learning_rate": 2.3399657273419648e-05, + "loss": 0.0368, + "step": 12560 + }, + { + "epoch": 14.09, + "learning_rate": 2.3393945163747145e-05, + "loss": 0.0358, + "step": 12570 + }, + { + "epoch": 14.1, + "learning_rate": 2.338823305407464e-05, + "loss": 0.0361, + "step": 12580 + }, + { + "epoch": 14.11, + "learning_rate": 2.3382520944402132e-05, + "loss": 0.0412, + "step": 12590 + }, + { + "epoch": 14.13, + "learning_rate": 2.3376808834729626e-05, + "loss": 0.0415, + "step": 12600 + }, + { + "epoch": 14.14, + "learning_rate": 2.3371096725057123e-05, + "loss": 0.0372, + "step": 12610 + }, + { + "epoch": 14.15, + "learning_rate": 2.3365384615384616e-05, + "loss": 0.0311, + "step": 12620 + }, + { + "epoch": 14.16, + "learning_rate": 2.335967250571211e-05, + "loss": 0.0335, + "step": 12630 + }, + { + "epoch": 14.17, + "learning_rate": 2.3353960396039607e-05, + "loss": 0.0495, + "step": 12640 + }, + { + "epoch": 14.18, + "learning_rate": 2.33482482863671e-05, + "loss": 0.0414, + "step": 12650 + }, + { + "epoch": 14.19, + "learning_rate": 2.3342536176694594e-05, + "loss": 0.0333, + "step": 12660 + }, + { + "epoch": 14.2, + "learning_rate": 2.3336824067022087e-05, + "loss": 0.0346, + "step": 12670 + }, + { + "epoch": 14.22, + "learning_rate": 2.333111195734958e-05, + "loss": 0.0337, + "step": 12680 + }, + { + "epoch": 14.23, + "learning_rate": 2.3325399847677074e-05, + "loss": 0.0504, + "step": 12690 + }, + { + "epoch": 14.24, + "learning_rate": 2.3319687738004568e-05, + "loss": 0.039, + "step": 12700 + }, + { + "epoch": 14.25, + "learning_rate": 2.3313975628332065e-05, + "loss": 0.0361, + "step": 12710 + }, + { + "epoch": 14.26, + "learning_rate": 2.330826351865956e-05, + "loss": 0.0354, + "step": 12720 + }, + { + "epoch": 14.27, + "learning_rate": 2.3302551408987052e-05, + "loss": 0.0343, + "step": 12730 + }, + { + "epoch": 14.28, + "learning_rate": 2.329683929931455e-05, + "loss": 0.0484, + "step": 12740 + }, + { + "epoch": 14.29, + "learning_rate": 2.3291127189642042e-05, + "loss": 0.0414, + "step": 12750 + }, + { + "epoch": 14.3, + "learning_rate": 2.3285415079969536e-05, + "loss": 0.0387, + "step": 12760 + }, + { + "epoch": 14.32, + "learning_rate": 2.327970297029703e-05, + "loss": 0.0375, + "step": 12770 + }, + { + "epoch": 14.33, + "learning_rate": 2.3273990860624526e-05, + "loss": 0.0342, + "step": 12780 + }, + { + "epoch": 14.34, + "learning_rate": 2.326827875095202e-05, + "loss": 0.0474, + "step": 12790 + }, + { + "epoch": 14.35, + "learning_rate": 2.3262566641279514e-05, + "loss": 0.0366, + "step": 12800 + }, + { + "epoch": 14.36, + "learning_rate": 2.3256854531607007e-05, + "loss": 0.0374, + "step": 12810 + }, + { + "epoch": 14.37, + "learning_rate": 2.32511424219345e-05, + "loss": 0.0367, + "step": 12820 + }, + { + "epoch": 14.38, + "learning_rate": 2.3245430312261994e-05, + "loss": 0.0363, + "step": 12830 + }, + { + "epoch": 14.39, + "learning_rate": 2.323971820258949e-05, + "loss": 0.0489, + "step": 12840 + }, + { + "epoch": 14.41, + "learning_rate": 2.3234006092916985e-05, + "loss": 0.034, + "step": 12850 + }, + { + "epoch": 14.42, + "learning_rate": 2.3228293983244478e-05, + "loss": 0.0364, + "step": 12860 + }, + { + "epoch": 14.43, + "learning_rate": 2.3222581873571972e-05, + "loss": 0.0365, + "step": 12870 + }, + { + "epoch": 14.44, + "learning_rate": 2.321686976389947e-05, + "loss": 0.0354, + "step": 12880 + }, + { + "epoch": 14.45, + "learning_rate": 2.3211157654226962e-05, + "loss": 0.0455, + "step": 12890 + }, + { + "epoch": 14.46, + "learning_rate": 2.3205445544554456e-05, + "loss": 0.0367, + "step": 12900 + }, + { + "epoch": 14.47, + "learning_rate": 2.3199733434881953e-05, + "loss": 0.0327, + "step": 12910 + }, + { + "epoch": 14.48, + "learning_rate": 2.3194021325209446e-05, + "loss": 0.0364, + "step": 12920 + }, + { + "epoch": 14.5, + "learning_rate": 2.318830921553694e-05, + "loss": 0.0339, + "step": 12930 + }, + { + "epoch": 14.51, + "learning_rate": 2.3182597105864433e-05, + "loss": 0.0451, + "step": 12940 + }, + { + "epoch": 14.52, + "learning_rate": 2.3176884996191927e-05, + "loss": 0.0368, + "step": 12950 + }, + { + "epoch": 14.53, + "learning_rate": 2.317117288651942e-05, + "loss": 0.0385, + "step": 12960 + }, + { + "epoch": 14.54, + "learning_rate": 2.3165460776846914e-05, + "loss": 0.034, + "step": 12970 + }, + { + "epoch": 14.55, + "learning_rate": 2.315974866717441e-05, + "loss": 0.0325, + "step": 12980 + }, + { + "epoch": 14.56, + "learning_rate": 2.3154036557501904e-05, + "loss": 0.0464, + "step": 12990 + }, + { + "epoch": 14.57, + "learning_rate": 2.3148324447829398e-05, + "loss": 0.035, + "step": 13000 + }, + { + "epoch": 14.59, + "learning_rate": 2.3142612338156895e-05, + "loss": 0.0354, + "step": 13010 + }, + { + "epoch": 14.6, + "learning_rate": 2.313690022848439e-05, + "loss": 0.0339, + "step": 13020 + }, + { + "epoch": 14.61, + "learning_rate": 2.3131188118811882e-05, + "loss": 0.0331, + "step": 13030 + }, + { + "epoch": 14.62, + "learning_rate": 2.3125476009139375e-05, + "loss": 0.0462, + "step": 13040 + }, + { + "epoch": 14.63, + "learning_rate": 2.3119763899466872e-05, + "loss": 0.0385, + "step": 13050 + }, + { + "epoch": 14.64, + "learning_rate": 2.3114051789794366e-05, + "loss": 0.0368, + "step": 13060 + }, + { + "epoch": 14.65, + "learning_rate": 2.310833968012186e-05, + "loss": 0.0391, + "step": 13070 + }, + { + "epoch": 14.66, + "learning_rate": 2.3102627570449353e-05, + "loss": 0.0373, + "step": 13080 + }, + { + "epoch": 14.67, + "learning_rate": 2.3096915460776847e-05, + "loss": 0.0483, + "step": 13090 + }, + { + "epoch": 14.69, + "learning_rate": 2.309120335110434e-05, + "loss": 0.0389, + "step": 13100 + }, + { + "epoch": 14.7, + "learning_rate": 2.3085491241431834e-05, + "loss": 0.0357, + "step": 13110 + }, + { + "epoch": 14.71, + "learning_rate": 2.307977913175933e-05, + "loss": 0.0351, + "step": 13120 + }, + { + "epoch": 14.72, + "learning_rate": 2.3074067022086824e-05, + "loss": 0.0378, + "step": 13130 + }, + { + "epoch": 14.73, + "learning_rate": 2.3068354912414318e-05, + "loss": 0.0427, + "step": 13140 + }, + { + "epoch": 14.74, + "learning_rate": 2.3062642802741815e-05, + "loss": 0.0394, + "step": 13150 + }, + { + "epoch": 14.75, + "learning_rate": 2.3056930693069308e-05, + "loss": 0.037, + "step": 13160 + }, + { + "epoch": 14.76, + "learning_rate": 2.3051218583396802e-05, + "loss": 0.0338, + "step": 13170 + }, + { + "epoch": 14.78, + "learning_rate": 2.30455064737243e-05, + "loss": 0.038, + "step": 13180 + }, + { + "epoch": 14.79, + "learning_rate": 2.3039794364051792e-05, + "loss": 0.0429, + "step": 13190 + }, + { + "epoch": 14.8, + "learning_rate": 2.3034082254379286e-05, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 14.81, + "learning_rate": 2.3028370144706776e-05, + "loss": 0.0326, + "step": 13210 + }, + { + "epoch": 14.82, + "learning_rate": 2.3022658035034273e-05, + "loss": 0.0352, + "step": 13220 + }, + { + "epoch": 14.83, + "learning_rate": 2.3016945925361766e-05, + "loss": 0.0334, + "step": 13230 + }, + { + "epoch": 14.84, + "learning_rate": 2.301123381568926e-05, + "loss": 0.0508, + "step": 13240 + }, + { + "epoch": 14.85, + "learning_rate": 2.3005521706016757e-05, + "loss": 0.0393, + "step": 13250 + }, + { + "epoch": 14.87, + "learning_rate": 2.299980959634425e-05, + "loss": 0.035, + "step": 13260 + }, + { + "epoch": 14.88, + "learning_rate": 2.2994097486671744e-05, + "loss": 0.0323, + "step": 13270 + }, + { + "epoch": 14.89, + "learning_rate": 2.2988385376999237e-05, + "loss": 0.0332, + "step": 13280 + }, + { + "epoch": 14.9, + "learning_rate": 2.2982673267326734e-05, + "loss": 0.0505, + "step": 13290 + }, + { + "epoch": 14.91, + "learning_rate": 2.2976961157654228e-05, + "loss": 0.0376, + "step": 13300 + }, + { + "epoch": 14.92, + "learning_rate": 2.297124904798172e-05, + "loss": 0.0423, + "step": 13310 + }, + { + "epoch": 14.93, + "learning_rate": 2.296553693830922e-05, + "loss": 0.0347, + "step": 13320 + }, + { + "epoch": 14.94, + "learning_rate": 2.2959824828636712e-05, + "loss": 0.0335, + "step": 13330 + }, + { + "epoch": 14.96, + "learning_rate": 2.2954112718964202e-05, + "loss": 0.05, + "step": 13340 + }, + { + "epoch": 14.97, + "learning_rate": 2.29484006092917e-05, + "loss": 0.0392, + "step": 13350 + }, + { + "epoch": 14.98, + "learning_rate": 2.2942688499619193e-05, + "loss": 0.0395, + "step": 13360 + }, + { + "epoch": 14.99, + "learning_rate": 2.2936976389946686e-05, + "loss": 0.0352, + "step": 13370 + }, + { + "epoch": 15.0, + "learning_rate": 2.293126428027418e-05, + "loss": 0.0554, + "step": 13380 + }, + { + "epoch": 15.0, + "eval_cer": 1.1545259968925132, + "eval_loss": 0.0888233408331871, + "eval_runtime": 479.4568, + "eval_samples_per_second": 11.102, + "eval_steps_per_second": 1.389, + "eval_wer": 3.1510157722735053, + "step": 13380 + }, + { + "epoch": 15.01, + "learning_rate": 2.2925552170601677e-05, + "loss": 0.0381, + "step": 13390 + }, + { + "epoch": 15.02, + "learning_rate": 2.291984006092917e-05, + "loss": 0.0309, + "step": 13400 + }, + { + "epoch": 15.03, + "learning_rate": 2.2914127951256664e-05, + "loss": 0.032, + "step": 13410 + }, + { + "epoch": 15.04, + "learning_rate": 2.290841584158416e-05, + "loss": 0.0325, + "step": 13420 + }, + { + "epoch": 15.06, + "learning_rate": 2.2902703731911654e-05, + "loss": 0.0456, + "step": 13430 + }, + { + "epoch": 15.07, + "learning_rate": 2.2896991622239148e-05, + "loss": 0.0364, + "step": 13440 + }, + { + "epoch": 15.08, + "learning_rate": 2.2891279512566645e-05, + "loss": 0.0296, + "step": 13450 + }, + { + "epoch": 15.09, + "learning_rate": 2.2885567402894138e-05, + "loss": 0.0328, + "step": 13460 + }, + { + "epoch": 15.1, + "learning_rate": 2.2879855293221632e-05, + "loss": 0.0323, + "step": 13470 + }, + { + "epoch": 15.11, + "learning_rate": 2.2874143183549122e-05, + "loss": 0.0404, + "step": 13480 + }, + { + "epoch": 15.12, + "learning_rate": 2.286843107387662e-05, + "loss": 0.0378, + "step": 13490 + }, + { + "epoch": 15.13, + "learning_rate": 2.2862718964204112e-05, + "loss": 0.0351, + "step": 13500 + }, + { + "epoch": 15.15, + "learning_rate": 2.2857006854531606e-05, + "loss": 0.0333, + "step": 13510 + }, + { + "epoch": 15.16, + "learning_rate": 2.2851294744859103e-05, + "loss": 0.0316, + "step": 13520 + }, + { + "epoch": 15.17, + "learning_rate": 2.2845582635186596e-05, + "loss": 0.0468, + "step": 13530 + }, + { + "epoch": 15.18, + "learning_rate": 2.283987052551409e-05, + "loss": 0.03, + "step": 13540 + }, + { + "epoch": 15.19, + "learning_rate": 2.2834158415841583e-05, + "loss": 0.0357, + "step": 13550 + }, + { + "epoch": 15.2, + "learning_rate": 2.282844630616908e-05, + "loss": 0.0331, + "step": 13560 + }, + { + "epoch": 15.21, + "learning_rate": 2.2822734196496574e-05, + "loss": 0.0295, + "step": 13570 + }, + { + "epoch": 15.22, + "learning_rate": 2.2817022086824067e-05, + "loss": 0.0448, + "step": 13580 + }, + { + "epoch": 15.24, + "learning_rate": 2.2811309977151564e-05, + "loss": 0.0336, + "step": 13590 + }, + { + "epoch": 15.25, + "learning_rate": 2.2805597867479058e-05, + "loss": 0.0314, + "step": 13600 + }, + { + "epoch": 15.26, + "learning_rate": 2.2799885757806548e-05, + "loss": 0.0319, + "step": 13610 + }, + { + "epoch": 15.27, + "learning_rate": 2.2794173648134045e-05, + "loss": 0.0266, + "step": 13620 + }, + { + "epoch": 15.28, + "learning_rate": 2.278846153846154e-05, + "loss": 0.0455, + "step": 13630 + }, + { + "epoch": 15.29, + "learning_rate": 2.2782749428789032e-05, + "loss": 0.0359, + "step": 13640 + }, + { + "epoch": 15.3, + "learning_rate": 2.2777037319116526e-05, + "loss": 0.0333, + "step": 13650 + }, + { + "epoch": 15.31, + "learning_rate": 2.2771325209444023e-05, + "loss": 0.0301, + "step": 13660 + }, + { + "epoch": 15.33, + "learning_rate": 2.2765613099771516e-05, + "loss": 0.0349, + "step": 13670 + }, + { + "epoch": 15.34, + "learning_rate": 2.275990099009901e-05, + "loss": 0.0403, + "step": 13680 + }, + { + "epoch": 15.35, + "learning_rate": 2.2754188880426507e-05, + "loss": 0.0333, + "step": 13690 + }, + { + "epoch": 15.36, + "learning_rate": 2.2748476770754e-05, + "loss": 0.0357, + "step": 13700 + }, + { + "epoch": 15.37, + "learning_rate": 2.2742764661081494e-05, + "loss": 0.0332, + "step": 13710 + }, + { + "epoch": 15.38, + "learning_rate": 2.2737052551408987e-05, + "loss": 0.03, + "step": 13720 + }, + { + "epoch": 15.39, + "learning_rate": 2.2731340441736484e-05, + "loss": 0.0334, + "step": 13730 + }, + { + "epoch": 15.4, + "learning_rate": 2.2725628332063974e-05, + "loss": 0.0353, + "step": 13740 + }, + { + "epoch": 15.41, + "learning_rate": 2.2719916222391468e-05, + "loss": 0.0336, + "step": 13750 + }, + { + "epoch": 15.43, + "learning_rate": 2.2714204112718965e-05, + "loss": 0.0365, + "step": 13760 + }, + { + "epoch": 15.44, + "learning_rate": 2.270849200304646e-05, + "loss": 0.0335, + "step": 13770 + }, + { + "epoch": 15.45, + "learning_rate": 2.2702779893373952e-05, + "loss": 0.0382, + "step": 13780 + }, + { + "epoch": 15.46, + "learning_rate": 2.269706778370145e-05, + "loss": 0.0357, + "step": 13790 + }, + { + "epoch": 15.47, + "learning_rate": 2.2691355674028942e-05, + "loss": 0.0337, + "step": 13800 + }, + { + "epoch": 15.48, + "learning_rate": 2.2685643564356436e-05, + "loss": 0.0309, + "step": 13810 + }, + { + "epoch": 15.49, + "learning_rate": 2.267993145468393e-05, + "loss": 0.0325, + "step": 13820 + }, + { + "epoch": 15.5, + "learning_rate": 2.2674219345011426e-05, + "loss": 0.0519, + "step": 13830 + }, + { + "epoch": 15.52, + "learning_rate": 2.266850723533892e-05, + "loss": 0.0352, + "step": 13840 + }, + { + "epoch": 15.53, + "learning_rate": 2.2662795125666413e-05, + "loss": 0.0338, + "step": 13850 + }, + { + "epoch": 15.54, + "learning_rate": 2.265708301599391e-05, + "loss": 0.0332, + "step": 13860 + }, + { + "epoch": 15.55, + "learning_rate": 2.26513709063214e-05, + "loss": 0.0314, + "step": 13870 + }, + { + "epoch": 15.56, + "learning_rate": 2.2645658796648894e-05, + "loss": 0.048, + "step": 13880 + }, + { + "epoch": 15.57, + "learning_rate": 2.263994668697639e-05, + "loss": 0.0364, + "step": 13890 + }, + { + "epoch": 15.58, + "learning_rate": 2.2634234577303885e-05, + "loss": 0.0331, + "step": 13900 + }, + { + "epoch": 15.59, + "learning_rate": 2.262909367859863e-05, + "loss": 0.0312, + "step": 13910 + }, + { + "epoch": 15.61, + "learning_rate": 2.2623381568926124e-05, + "loss": 0.0313, + "step": 13920 + }, + { + "epoch": 15.62, + "learning_rate": 2.2617669459253617e-05, + "loss": 0.0472, + "step": 13930 + }, + { + "epoch": 15.63, + "learning_rate": 2.261195734958111e-05, + "loss": 0.0343, + "step": 13940 + }, + { + "epoch": 15.64, + "learning_rate": 2.2606245239908605e-05, + "loss": 0.0326, + "step": 13950 + }, + { + "epoch": 15.65, + "learning_rate": 2.26005331302361e-05, + "loss": 0.033, + "step": 13960 + }, + { + "epoch": 15.66, + "learning_rate": 2.2594821020563595e-05, + "loss": 0.0336, + "step": 13970 + }, + { + "epoch": 15.67, + "learning_rate": 2.258910891089109e-05, + "loss": 0.0419, + "step": 13980 + }, + { + "epoch": 15.68, + "learning_rate": 2.2583396801218586e-05, + "loss": 0.0372, + "step": 13990 + }, + { + "epoch": 15.7, + "learning_rate": 2.257768469154608e-05, + "loss": 0.0349, + "step": 14000 + }, + { + "epoch": 15.71, + "learning_rate": 2.2571972581873573e-05, + "loss": 0.0356, + "step": 14010 + }, + { + "epoch": 15.72, + "learning_rate": 2.256626047220107e-05, + "loss": 0.0299, + "step": 14020 + }, + { + "epoch": 15.73, + "learning_rate": 2.2560548362528563e-05, + "loss": 0.0349, + "step": 14030 + }, + { + "epoch": 15.74, + "learning_rate": 2.2554836252856057e-05, + "loss": 0.0356, + "step": 14040 + }, + { + "epoch": 15.75, + "learning_rate": 2.2549124143183547e-05, + "loss": 0.0339, + "step": 14050 + }, + { + "epoch": 15.76, + "learning_rate": 2.2543412033511044e-05, + "loss": 0.0362, + "step": 14060 + }, + { + "epoch": 15.77, + "learning_rate": 2.2537699923838537e-05, + "loss": 0.0328, + "step": 14070 + }, + { + "epoch": 15.78, + "learning_rate": 2.253198781416603e-05, + "loss": 0.0407, + "step": 14080 + }, + { + "epoch": 15.8, + "learning_rate": 2.2526275704493528e-05, + "loss": 0.0366, + "step": 14090 + }, + { + "epoch": 15.81, + "learning_rate": 2.252056359482102e-05, + "loss": 0.0354, + "step": 14100 + }, + { + "epoch": 15.82, + "learning_rate": 2.2514851485148515e-05, + "loss": 0.0325, + "step": 14110 + }, + { + "epoch": 15.83, + "learning_rate": 2.250913937547601e-05, + "loss": 0.0338, + "step": 14120 + }, + { + "epoch": 15.84, + "learning_rate": 2.2503427265803505e-05, + "loss": 0.0394, + "step": 14130 + }, + { + "epoch": 15.85, + "learning_rate": 2.2497715156131e-05, + "loss": 0.0371, + "step": 14140 + }, + { + "epoch": 15.86, + "learning_rate": 2.2492003046458492e-05, + "loss": 0.0298, + "step": 14150 + }, + { + "epoch": 15.87, + "learning_rate": 2.248629093678599e-05, + "loss": 0.0341, + "step": 14160 + }, + { + "epoch": 15.89, + "learning_rate": 2.2480578827113483e-05, + "loss": 0.0283, + "step": 14170 + }, + { + "epoch": 15.9, + "learning_rate": 2.2474866717440973e-05, + "loss": 0.0467, + "step": 14180 + }, + { + "epoch": 15.91, + "learning_rate": 2.246915460776847e-05, + "loss": 0.0329, + "step": 14190 + }, + { + "epoch": 15.92, + "learning_rate": 2.2463442498095963e-05, + "loss": 0.0354, + "step": 14200 + }, + { + "epoch": 15.93, + "learning_rate": 2.2457730388423457e-05, + "loss": 0.0322, + "step": 14210 + }, + { + "epoch": 15.94, + "learning_rate": 2.245201827875095e-05, + "loss": 0.0324, + "step": 14220 + }, + { + "epoch": 15.95, + "learning_rate": 2.2446306169078447e-05, + "loss": 0.0404, + "step": 14230 + }, + { + "epoch": 15.96, + "learning_rate": 2.244059405940594e-05, + "loss": 0.0328, + "step": 14240 + }, + { + "epoch": 15.98, + "learning_rate": 2.2434881949733435e-05, + "loss": 0.0371, + "step": 14250 + }, + { + "epoch": 15.99, + "learning_rate": 2.242916984006093e-05, + "loss": 0.0335, + "step": 14260 + }, + { + "epoch": 16.0, + "learning_rate": 2.2423457730388425e-05, + "loss": 0.0341, + "step": 14270 + }, + { + "epoch": 16.0, + "eval_cer": 1.1528311728813307, + "eval_loss": 0.09186279773712158, + "eval_runtime": 496.1516, + "eval_samples_per_second": 10.729, + "eval_steps_per_second": 1.342, + "eval_wer": 3.1496039184858198, + "step": 14272 + }, + { + "epoch": 16.01, + "learning_rate": 2.241774562071592e-05, + "loss": 0.0444, + "step": 14280 + }, + { + "epoch": 16.02, + "learning_rate": 2.2412033511043412e-05, + "loss": 0.0339, + "step": 14290 + }, + { + "epoch": 16.03, + "learning_rate": 2.240632140137091e-05, + "loss": 0.0318, + "step": 14300 + }, + { + "epoch": 16.04, + "learning_rate": 2.24006092916984e-05, + "loss": 0.0329, + "step": 14310 + }, + { + "epoch": 16.05, + "learning_rate": 2.2394897182025893e-05, + "loss": 0.036, + "step": 14320 + }, + { + "epoch": 16.07, + "learning_rate": 2.238918507235339e-05, + "loss": 0.038, + "step": 14330 + }, + { + "epoch": 16.08, + "learning_rate": 2.2383472962680883e-05, + "loss": 0.0284, + "step": 14340 + }, + { + "epoch": 16.09, + "learning_rate": 2.2377760853008377e-05, + "loss": 0.0298, + "step": 14350 + }, + { + "epoch": 16.1, + "learning_rate": 2.2372048743335874e-05, + "loss": 0.0279, + "step": 14360 + }, + { + "epoch": 16.11, + "learning_rate": 2.2366336633663367e-05, + "loss": 0.0322, + "step": 14370 + }, + { + "epoch": 16.12, + "learning_rate": 2.236062452399086e-05, + "loss": 0.0353, + "step": 14380 + }, + { + "epoch": 16.13, + "learning_rate": 2.2354912414318354e-05, + "loss": 0.0315, + "step": 14390 + }, + { + "epoch": 16.14, + "learning_rate": 2.234920030464585e-05, + "loss": 0.0321, + "step": 14400 + }, + { + "epoch": 16.15, + "learning_rate": 2.2343488194973345e-05, + "loss": 0.0304, + "step": 14410 + }, + { + "epoch": 16.17, + "learning_rate": 2.233777608530084e-05, + "loss": 0.033, + "step": 14420 + }, + { + "epoch": 16.18, + "learning_rate": 2.2332063975628335e-05, + "loss": 0.0365, + "step": 14430 + }, + { + "epoch": 16.19, + "learning_rate": 2.232635186595583e-05, + "loss": 0.0334, + "step": 14440 + }, + { + "epoch": 16.2, + "learning_rate": 2.232063975628332e-05, + "loss": 0.0317, + "step": 14450 + }, + { + "epoch": 16.21, + "learning_rate": 2.2314927646610816e-05, + "loss": 0.0292, + "step": 14460 + }, + { + "epoch": 16.22, + "learning_rate": 2.230921553693831e-05, + "loss": 0.0327, + "step": 14470 + }, + { + "epoch": 16.23, + "learning_rate": 2.2303503427265803e-05, + "loss": 0.041, + "step": 14480 + }, + { + "epoch": 16.24, + "learning_rate": 2.2297791317593297e-05, + "loss": 0.0314, + "step": 14490 + }, + { + "epoch": 16.26, + "learning_rate": 2.2292079207920793e-05, + "loss": 0.028, + "step": 14500 + }, + { + "epoch": 16.27, + "learning_rate": 2.2286367098248287e-05, + "loss": 0.0285, + "step": 14510 + }, + { + "epoch": 16.28, + "learning_rate": 2.228065498857578e-05, + "loss": 0.0289, + "step": 14520 + }, + { + "epoch": 16.29, + "learning_rate": 2.2274942878903277e-05, + "loss": 0.0389, + "step": 14530 + }, + { + "epoch": 16.3, + "learning_rate": 2.226923076923077e-05, + "loss": 0.0295, + "step": 14540 + }, + { + "epoch": 16.31, + "learning_rate": 2.2263518659558265e-05, + "loss": 0.0289, + "step": 14550 + }, + { + "epoch": 16.32, + "learning_rate": 2.2257806549885758e-05, + "loss": 0.0274, + "step": 14560 + }, + { + "epoch": 16.33, + "learning_rate": 2.2252094440213255e-05, + "loss": 0.0304, + "step": 14570 + }, + { + "epoch": 16.35, + "learning_rate": 2.2246382330540745e-05, + "loss": 0.037, + "step": 14580 + }, + { + "epoch": 16.36, + "learning_rate": 2.224067022086824e-05, + "loss": 0.0281, + "step": 14590 + }, + { + "epoch": 16.37, + "learning_rate": 2.2234958111195736e-05, + "loss": 0.0306, + "step": 14600 + }, + { + "epoch": 16.38, + "learning_rate": 2.222924600152323e-05, + "loss": 0.0284, + "step": 14610 + }, + { + "epoch": 16.39, + "learning_rate": 2.2223533891850723e-05, + "loss": 0.0302, + "step": 14620 + }, + { + "epoch": 16.4, + "learning_rate": 2.221782178217822e-05, + "loss": 0.0374, + "step": 14630 + }, + { + "epoch": 16.41, + "learning_rate": 2.2212109672505713e-05, + "loss": 0.0295, + "step": 14640 + }, + { + "epoch": 16.42, + "learning_rate": 2.2206397562833207e-05, + "loss": 0.0287, + "step": 14650 + }, + { + "epoch": 16.43, + "learning_rate": 2.22006854531607e-05, + "loss": 0.028, + "step": 14660 + }, + { + "epoch": 16.45, + "learning_rate": 2.2194973343488197e-05, + "loss": 0.0317, + "step": 14670 + }, + { + "epoch": 16.46, + "learning_rate": 2.218926123381569e-05, + "loss": 0.0391, + "step": 14680 + }, + { + "epoch": 16.47, + "learning_rate": 2.2183549124143184e-05, + "loss": 0.0325, + "step": 14690 + }, + { + "epoch": 16.48, + "learning_rate": 2.217783701447068e-05, + "loss": 0.0302, + "step": 14700 + }, + { + "epoch": 16.49, + "learning_rate": 2.217212490479817e-05, + "loss": 0.0327, + "step": 14710 + }, + { + "epoch": 16.5, + "learning_rate": 2.2166412795125665e-05, + "loss": 0.0298, + "step": 14720 + }, + { + "epoch": 16.51, + "learning_rate": 2.216070068545316e-05, + "loss": 0.0431, + "step": 14730 + }, + { + "epoch": 16.52, + "learning_rate": 2.2154988575780655e-05, + "loss": 0.0307, + "step": 14740 + }, + { + "epoch": 16.54, + "learning_rate": 2.214927646610815e-05, + "loss": 0.0278, + "step": 14750 + }, + { + "epoch": 16.55, + "learning_rate": 2.2143564356435643e-05, + "loss": 0.0309, + "step": 14760 + }, + { + "epoch": 16.56, + "learning_rate": 2.213785224676314e-05, + "loss": 0.0377, + "step": 14770 + }, + { + "epoch": 16.57, + "learning_rate": 2.2132140137090633e-05, + "loss": 0.044, + "step": 14780 + }, + { + "epoch": 16.58, + "learning_rate": 2.2126428027418127e-05, + "loss": 0.0319, + "step": 14790 + }, + { + "epoch": 16.59, + "learning_rate": 2.2120715917745623e-05, + "loss": 0.0297, + "step": 14800 + }, + { + "epoch": 16.6, + "learning_rate": 2.2115003808073117e-05, + "loss": 0.0319, + "step": 14810 + }, + { + "epoch": 16.61, + "learning_rate": 2.210929169840061e-05, + "loss": 0.0412, + "step": 14820 + }, + { + "epoch": 16.63, + "learning_rate": 2.2103579588728104e-05, + "loss": 0.0416, + "step": 14830 + }, + { + "epoch": 16.64, + "learning_rate": 2.2097867479055598e-05, + "loss": 0.032, + "step": 14840 + }, + { + "epoch": 16.65, + "learning_rate": 2.209215536938309e-05, + "loss": 0.0318, + "step": 14850 + }, + { + "epoch": 16.66, + "learning_rate": 2.2086443259710585e-05, + "loss": 0.0281, + "step": 14860 + }, + { + "epoch": 16.67, + "learning_rate": 2.208073115003808e-05, + "loss": 0.0314, + "step": 14870 + }, + { + "epoch": 16.68, + "learning_rate": 2.2075019040365575e-05, + "loss": 0.0373, + "step": 14880 + }, + { + "epoch": 16.69, + "learning_rate": 2.206930693069307e-05, + "loss": 0.0322, + "step": 14890 + }, + { + "epoch": 16.7, + "learning_rate": 2.2063594821020566e-05, + "loss": 0.0318, + "step": 14900 + }, + { + "epoch": 16.72, + "learning_rate": 2.205788271134806e-05, + "loss": 0.0312, + "step": 14910 + }, + { + "epoch": 16.73, + "learning_rate": 2.2052170601675553e-05, + "loss": 0.033, + "step": 14920 + }, + { + "epoch": 16.74, + "learning_rate": 2.2046458492003046e-05, + "loss": 0.0364, + "step": 14930 + }, + { + "epoch": 16.75, + "learning_rate": 2.2040746382330543e-05, + "loss": 0.0307, + "step": 14940 + }, + { + "epoch": 16.76, + "learning_rate": 2.2035034272658037e-05, + "loss": 0.0309, + "step": 14950 + }, + { + "epoch": 16.77, + "learning_rate": 2.202932216298553e-05, + "loss": 0.0255, + "step": 14960 + }, + { + "epoch": 16.78, + "learning_rate": 2.2023610053313027e-05, + "loss": 0.0313, + "step": 14970 + }, + { + "epoch": 16.79, + "learning_rate": 2.2017897943640517e-05, + "loss": 0.0411, + "step": 14980 + }, + { + "epoch": 16.8, + "learning_rate": 2.201218583396801e-05, + "loss": 0.0307, + "step": 14990 + }, + { + "epoch": 16.82, + "learning_rate": 2.2006473724295504e-05, + "loss": 0.0286, + "step": 15000 + }, + { + "epoch": 16.83, + "learning_rate": 2.2000761614623e-05, + "loss": 0.0294, + "step": 15010 + }, + { + "epoch": 16.84, + "learning_rate": 2.1995049504950495e-05, + "loss": 0.0352, + "step": 15020 + }, + { + "epoch": 16.85, + "learning_rate": 2.198933739527799e-05, + "loss": 0.0375, + "step": 15030 + }, + { + "epoch": 16.86, + "learning_rate": 2.1983625285605485e-05, + "loss": 0.0292, + "step": 15040 + }, + { + "epoch": 16.87, + "learning_rate": 2.197791317593298e-05, + "loss": 0.0278, + "step": 15050 + }, + { + "epoch": 16.88, + "learning_rate": 2.1972201066260473e-05, + "loss": 0.0293, + "step": 15060 + }, + { + "epoch": 16.89, + "learning_rate": 2.196648895658797e-05, + "loss": 0.0351, + "step": 15070 + }, + { + "epoch": 16.91, + "learning_rate": 2.1960776846915463e-05, + "loss": 0.0391, + "step": 15080 + }, + { + "epoch": 16.92, + "learning_rate": 2.1955064737242957e-05, + "loss": 0.0264, + "step": 15090 + }, + { + "epoch": 16.93, + "learning_rate": 2.194935262757045e-05, + "loss": 0.0329, + "step": 15100 + }, + { + "epoch": 16.94, + "learning_rate": 2.1943640517897944e-05, + "loss": 0.0293, + "step": 15110 + }, + { + "epoch": 16.95, + "learning_rate": 2.1937928408225437e-05, + "loss": 0.0289, + "step": 15120 + }, + { + "epoch": 16.96, + "learning_rate": 2.193221629855293e-05, + "loss": 0.0363, + "step": 15130 + }, + { + "epoch": 16.97, + "learning_rate": 2.1926504188880428e-05, + "loss": 0.0298, + "step": 15140 + }, + { + "epoch": 16.98, + "learning_rate": 2.192079207920792e-05, + "loss": 0.0288, + "step": 15150 + }, + { + "epoch": 17.0, + "learning_rate": 2.1915079969535415e-05, + "loss": 0.0274, + "step": 15160 + }, + { + "epoch": 17.0, + "eval_cer": 1.156931793079252, + "eval_loss": 0.09139201790094376, + "eval_runtime": 484.2077, + "eval_samples_per_second": 10.993, + "eval_steps_per_second": 1.375, + "eval_wer": 3.149277903830935, + "step": 15164 + }, + { + "epoch": 17.01, + "learning_rate": 2.1909367859862908e-05, + "loss": 0.0459, + "step": 15170 + }, + { + "epoch": 17.02, + "learning_rate": 2.1903655750190405e-05, + "loss": 0.0313, + "step": 15180 + }, + { + "epoch": 17.03, + "learning_rate": 2.18979436405179e-05, + "loss": 0.0311, + "step": 15190 + }, + { + "epoch": 17.04, + "learning_rate": 2.1892231530845392e-05, + "loss": 0.0285, + "step": 15200 + }, + { + "epoch": 17.05, + "learning_rate": 2.188651942117289e-05, + "loss": 0.0285, + "step": 15210 + }, + { + "epoch": 17.06, + "learning_rate": 2.1880807311500383e-05, + "loss": 0.0377, + "step": 15220 + }, + { + "epoch": 17.07, + "learning_rate": 2.1875095201827876e-05, + "loss": 0.0272, + "step": 15230 + }, + { + "epoch": 17.09, + "learning_rate": 2.186938309215537e-05, + "loss": 0.0254, + "step": 15240 + }, + { + "epoch": 17.1, + "learning_rate": 2.1863670982482863e-05, + "loss": 0.0297, + "step": 15250 + }, + { + "epoch": 17.11, + "learning_rate": 2.1857958872810357e-05, + "loss": 0.024, + "step": 15260 + }, + { + "epoch": 17.12, + "learning_rate": 2.185224676313785e-05, + "loss": 0.0317, + "step": 15270 + }, + { + "epoch": 17.13, + "learning_rate": 2.1846534653465347e-05, + "loss": 0.0312, + "step": 15280 + }, + { + "epoch": 17.14, + "learning_rate": 2.184082254379284e-05, + "loss": 0.0261, + "step": 15290 + }, + { + "epoch": 17.15, + "learning_rate": 2.1835110434120334e-05, + "loss": 0.0297, + "step": 15300 + }, + { + "epoch": 17.16, + "learning_rate": 2.182939832444783e-05, + "loss": 0.0273, + "step": 15310 + }, + { + "epoch": 17.17, + "learning_rate": 2.1823686214775325e-05, + "loss": 0.0373, + "step": 15320 + }, + { + "epoch": 17.19, + "learning_rate": 2.181797410510282e-05, + "loss": 0.0278, + "step": 15330 + }, + { + "epoch": 17.2, + "learning_rate": 2.1812261995430315e-05, + "loss": 0.0267, + "step": 15340 + }, + { + "epoch": 17.21, + "learning_rate": 2.180654988575781e-05, + "loss": 0.0261, + "step": 15350 + }, + { + "epoch": 17.22, + "learning_rate": 2.1800837776085303e-05, + "loss": 0.0262, + "step": 15360 + }, + { + "epoch": 17.23, + "learning_rate": 2.1795125666412793e-05, + "loss": 0.0395, + "step": 15370 + }, + { + "epoch": 17.24, + "learning_rate": 2.178941355674029e-05, + "loss": 0.0296, + "step": 15380 + }, + { + "epoch": 17.25, + "learning_rate": 2.1783701447067783e-05, + "loss": 0.03, + "step": 15390 + }, + { + "epoch": 17.26, + "learning_rate": 2.1777989337395277e-05, + "loss": 0.0282, + "step": 15400 + }, + { + "epoch": 17.28, + "learning_rate": 2.1772277227722774e-05, + "loss": 0.0267, + "step": 15410 + }, + { + "epoch": 17.29, + "learning_rate": 2.1766565118050267e-05, + "loss": 0.0371, + "step": 15420 + }, + { + "epoch": 17.3, + "learning_rate": 2.176085300837776e-05, + "loss": 0.0263, + "step": 15430 + }, + { + "epoch": 17.31, + "learning_rate": 2.1755140898705254e-05, + "loss": 0.0269, + "step": 15440 + }, + { + "epoch": 17.32, + "learning_rate": 2.174942878903275e-05, + "loss": 0.0252, + "step": 15450 + }, + { + "epoch": 17.33, + "learning_rate": 2.1743716679360245e-05, + "loss": 0.0269, + "step": 15460 + }, + { + "epoch": 17.34, + "learning_rate": 2.173857578065499e-05, + "loss": 0.0351, + "step": 15470 + }, + { + "epoch": 17.35, + "learning_rate": 2.1732863670982484e-05, + "loss": 0.029, + "step": 15480 + }, + { + "epoch": 17.37, + "learning_rate": 2.1727151561309978e-05, + "loss": 0.0273, + "step": 15490 + }, + { + "epoch": 17.38, + "learning_rate": 2.172143945163747e-05, + "loss": 0.0273, + "step": 15500 + }, + { + "epoch": 17.39, + "learning_rate": 2.1715727341964968e-05, + "loss": 0.0288, + "step": 15510 + }, + { + "epoch": 17.4, + "learning_rate": 2.1710015232292462e-05, + "loss": 0.0394, + "step": 15520 + }, + { + "epoch": 17.41, + "learning_rate": 2.1704303122619955e-05, + "loss": 0.0308, + "step": 15530 + }, + { + "epoch": 17.42, + "learning_rate": 2.1698591012947452e-05, + "loss": 0.0304, + "step": 15540 + }, + { + "epoch": 17.43, + "learning_rate": 2.1692878903274942e-05, + "loss": 0.0263, + "step": 15550 + }, + { + "epoch": 17.44, + "learning_rate": 2.1687166793602436e-05, + "loss": 0.0279, + "step": 15560 + }, + { + "epoch": 17.46, + "learning_rate": 2.168145468392993e-05, + "loss": 0.0391, + "step": 15570 + }, + { + "epoch": 17.47, + "learning_rate": 2.1675742574257426e-05, + "loss": 0.0326, + "step": 15580 + }, + { + "epoch": 17.48, + "learning_rate": 2.167003046458492e-05, + "loss": 0.0277, + "step": 15590 + }, + { + "epoch": 17.49, + "learning_rate": 2.1664318354912413e-05, + "loss": 0.0269, + "step": 15600 + }, + { + "epoch": 17.5, + "learning_rate": 2.165860624523991e-05, + "loss": 0.025, + "step": 15610 + }, + { + "epoch": 17.51, + "learning_rate": 2.1652894135567404e-05, + "loss": 0.0388, + "step": 15620 + }, + { + "epoch": 17.52, + "learning_rate": 2.1647182025894897e-05, + "loss": 0.0283, + "step": 15630 + }, + { + "epoch": 17.53, + "learning_rate": 2.1641469916222394e-05, + "loss": 0.0303, + "step": 15640 + }, + { + "epoch": 17.54, + "learning_rate": 2.1635757806549888e-05, + "loss": 0.0256, + "step": 15650 + }, + { + "epoch": 17.56, + "learning_rate": 2.163004569687738e-05, + "loss": 0.0251, + "step": 15660 + }, + { + "epoch": 17.57, + "learning_rate": 2.1624333587204875e-05, + "loss": 0.0345, + "step": 15670 + }, + { + "epoch": 17.58, + "learning_rate": 2.161862147753237e-05, + "loss": 0.0308, + "step": 15680 + }, + { + "epoch": 17.59, + "learning_rate": 2.1612909367859862e-05, + "loss": 0.0301, + "step": 15690 + }, + { + "epoch": 17.6, + "learning_rate": 2.1607197258187356e-05, + "loss": 0.029, + "step": 15700 + }, + { + "epoch": 17.61, + "learning_rate": 2.1601485148514853e-05, + "loss": 0.0275, + "step": 15710 + }, + { + "epoch": 17.62, + "learning_rate": 2.1595773038842346e-05, + "loss": 0.0394, + "step": 15720 + }, + { + "epoch": 17.63, + "learning_rate": 2.159006092916984e-05, + "loss": 0.0296, + "step": 15730 + }, + { + "epoch": 17.65, + "learning_rate": 2.1584348819497333e-05, + "loss": 0.0289, + "step": 15740 + }, + { + "epoch": 17.66, + "learning_rate": 2.157863670982483e-05, + "loss": 0.0262, + "step": 15750 + }, + { + "epoch": 17.67, + "learning_rate": 2.1572924600152324e-05, + "loss": 0.0243, + "step": 15760 + }, + { + "epoch": 17.68, + "learning_rate": 2.1567212490479817e-05, + "loss": 0.0365, + "step": 15770 + }, + { + "epoch": 17.69, + "learning_rate": 2.1561500380807314e-05, + "loss": 0.03, + "step": 15780 + }, + { + "epoch": 17.7, + "learning_rate": 2.1555788271134808e-05, + "loss": 0.0264, + "step": 15790 + }, + { + "epoch": 17.71, + "learning_rate": 2.15500761614623e-05, + "loss": 0.0291, + "step": 15800 + }, + { + "epoch": 17.72, + "learning_rate": 2.1544364051789795e-05, + "loss": 0.0262, + "step": 15810 + }, + { + "epoch": 17.74, + "learning_rate": 2.153865194211729e-05, + "loss": 0.0418, + "step": 15820 + }, + { + "epoch": 17.75, + "learning_rate": 2.1532939832444782e-05, + "loss": 0.029, + "step": 15830 + }, + { + "epoch": 17.76, + "learning_rate": 2.1527227722772275e-05, + "loss": 0.0291, + "step": 15840 + }, + { + "epoch": 17.77, + "learning_rate": 2.1521515613099772e-05, + "loss": 0.028, + "step": 15850 + }, + { + "epoch": 17.78, + "learning_rate": 2.1515803503427266e-05, + "loss": 0.0253, + "step": 15860 + }, + { + "epoch": 17.79, + "learning_rate": 2.151009139375476e-05, + "loss": 0.0378, + "step": 15870 + }, + { + "epoch": 17.8, + "learning_rate": 2.1504379284082256e-05, + "loss": 0.0275, + "step": 15880 + }, + { + "epoch": 17.81, + "learning_rate": 2.149866717440975e-05, + "loss": 0.0295, + "step": 15890 + }, + { + "epoch": 17.83, + "learning_rate": 2.1492955064737243e-05, + "loss": 0.0252, + "step": 15900 + }, + { + "epoch": 17.84, + "learning_rate": 2.148724295506474e-05, + "loss": 0.0276, + "step": 15910 + }, + { + "epoch": 17.85, + "learning_rate": 2.1481530845392234e-05, + "loss": 0.0463, + "step": 15920 + }, + { + "epoch": 17.86, + "learning_rate": 2.1475818735719727e-05, + "loss": 0.0283, + "step": 15930 + }, + { + "epoch": 17.87, + "learning_rate": 2.147010662604722e-05, + "loss": 0.0276, + "step": 15940 + }, + { + "epoch": 17.88, + "learning_rate": 2.1464394516374715e-05, + "loss": 0.029, + "step": 15950 + }, + { + "epoch": 17.89, + "learning_rate": 2.1458682406702208e-05, + "loss": 0.0274, + "step": 15960 + }, + { + "epoch": 17.9, + "learning_rate": 2.14529702970297e-05, + "loss": 0.0397, + "step": 15970 + }, + { + "epoch": 17.91, + "learning_rate": 2.14472581873572e-05, + "loss": 0.0308, + "step": 15980 + }, + { + "epoch": 17.93, + "learning_rate": 2.1441546077684692e-05, + "loss": 0.0261, + "step": 15990 + }, + { + "epoch": 17.94, + "learning_rate": 2.1435833968012186e-05, + "loss": 0.028, + "step": 16000 + }, + { + "epoch": 17.95, + "learning_rate": 2.143012185833968e-05, + "loss": 0.0296, + "step": 16010 + }, + { + "epoch": 17.96, + "learning_rate": 2.1424409748667176e-05, + "loss": 0.0411, + "step": 16020 + }, + { + "epoch": 17.97, + "learning_rate": 2.141869763899467e-05, + "loss": 0.027, + "step": 16030 + }, + { + "epoch": 17.98, + "learning_rate": 2.1412985529322163e-05, + "loss": 0.0256, + "step": 16040 + }, + { + "epoch": 17.99, + "learning_rate": 2.140727341964966e-05, + "loss": 0.0267, + "step": 16050 + }, + { + "epoch": 18.0, + "eval_cer": 1.1627782862043554, + "eval_loss": 0.09293551743030548, + "eval_runtime": 480.09, + "eval_samples_per_second": 11.088, + "eval_steps_per_second": 1.387, + "eval_wer": 3.1501402651761135, + "step": 16056 + }, + { + "epoch": 18.0, + "learning_rate": 2.1401561309977154e-05, + "loss": 0.0324, + "step": 16060 + }, + { + "epoch": 18.02, + "learning_rate": 2.1395849200304647e-05, + "loss": 0.0272, + "step": 16070 + }, + { + "epoch": 18.03, + "learning_rate": 2.139013709063214e-05, + "loss": 0.0297, + "step": 16080 + }, + { + "epoch": 18.04, + "learning_rate": 2.1384424980959634e-05, + "loss": 0.0274, + "step": 16090 + }, + { + "epoch": 18.05, + "learning_rate": 2.1378712871287128e-05, + "loss": 0.0247, + "step": 16100 + }, + { + "epoch": 18.06, + "learning_rate": 2.137300076161462e-05, + "loss": 0.0383, + "step": 16110 + }, + { + "epoch": 18.07, + "learning_rate": 2.136728865194212e-05, + "loss": 0.0301, + "step": 16120 + }, + { + "epoch": 18.08, + "learning_rate": 2.1361576542269612e-05, + "loss": 0.025, + "step": 16130 + }, + { + "epoch": 18.09, + "learning_rate": 2.1355864432597105e-05, + "loss": 0.0255, + "step": 16140 + }, + { + "epoch": 18.11, + "learning_rate": 2.1350152322924602e-05, + "loss": 0.0281, + "step": 16150 + }, + { + "epoch": 18.12, + "learning_rate": 2.1344440213252096e-05, + "loss": 0.0325, + "step": 16160 + }, + { + "epoch": 18.13, + "learning_rate": 2.133872810357959e-05, + "loss": 0.0264, + "step": 16170 + }, + { + "epoch": 18.14, + "learning_rate": 2.1333015993907083e-05, + "loss": 0.0241, + "step": 16180 + }, + { + "epoch": 18.15, + "learning_rate": 2.132730388423458e-05, + "loss": 0.0261, + "step": 16190 + }, + { + "epoch": 18.16, + "learning_rate": 2.1321591774562073e-05, + "loss": 0.026, + "step": 16200 + }, + { + "epoch": 18.17, + "learning_rate": 2.1315879664889564e-05, + "loss": 0.0385, + "step": 16210 + }, + { + "epoch": 18.18, + "learning_rate": 2.131016755521706e-05, + "loss": 0.0284, + "step": 16220 + }, + { + "epoch": 18.2, + "learning_rate": 2.1304455445544554e-05, + "loss": 0.0295, + "step": 16230 + }, + { + "epoch": 18.21, + "learning_rate": 2.1298743335872048e-05, + "loss": 0.0252, + "step": 16240 + }, + { + "epoch": 18.22, + "learning_rate": 2.1293031226199545e-05, + "loss": 0.028, + "step": 16250 + }, + { + "epoch": 18.23, + "learning_rate": 2.1287319116527038e-05, + "loss": 0.0375, + "step": 16260 + }, + { + "epoch": 18.24, + "learning_rate": 2.128160700685453e-05, + "loss": 0.0269, + "step": 16270 + }, + { + "epoch": 18.25, + "learning_rate": 2.1275894897182025e-05, + "loss": 0.0273, + "step": 16280 + }, + { + "epoch": 18.26, + "learning_rate": 2.1270182787509522e-05, + "loss": 0.0227, + "step": 16290 + }, + { + "epoch": 18.27, + "learning_rate": 2.1264470677837016e-05, + "loss": 0.0247, + "step": 16300 + }, + { + "epoch": 18.28, + "learning_rate": 2.125875856816451e-05, + "loss": 0.0359, + "step": 16310 + }, + { + "epoch": 18.3, + "learning_rate": 2.1253046458492006e-05, + "loss": 0.0269, + "step": 16320 + }, + { + "epoch": 18.31, + "learning_rate": 2.12473343488195e-05, + "loss": 0.0258, + "step": 16330 + }, + { + "epoch": 18.32, + "learning_rate": 2.124162223914699e-05, + "loss": 0.0257, + "step": 16340 + }, + { + "epoch": 18.33, + "learning_rate": 2.1235910129474487e-05, + "loss": 0.0237, + "step": 16350 + }, + { + "epoch": 18.34, + "learning_rate": 2.123019801980198e-05, + "loss": 0.042, + "step": 16360 + }, + { + "epoch": 18.35, + "learning_rate": 2.1224485910129474e-05, + "loss": 0.0261, + "step": 16370 + }, + { + "epoch": 18.36, + "learning_rate": 2.1218773800456967e-05, + "loss": 0.0254, + "step": 16380 + }, + { + "epoch": 18.37, + "learning_rate": 2.1213061690784464e-05, + "loss": 0.0265, + "step": 16390 + }, + { + "epoch": 18.39, + "learning_rate": 2.1207349581111958e-05, + "loss": 0.0272, + "step": 16400 + }, + { + "epoch": 18.4, + "learning_rate": 2.120163747143945e-05, + "loss": 0.0323, + "step": 16410 + }, + { + "epoch": 18.41, + "learning_rate": 2.1195925361766948e-05, + "loss": 0.0266, + "step": 16420 + }, + { + "epoch": 18.42, + "learning_rate": 2.1190213252094442e-05, + "loss": 0.0266, + "step": 16430 + }, + { + "epoch": 18.43, + "learning_rate": 2.1184501142421935e-05, + "loss": 0.0266, + "step": 16440 + }, + { + "epoch": 18.44, + "learning_rate": 2.117878903274943e-05, + "loss": 0.0235, + "step": 16450 + }, + { + "epoch": 18.45, + "learning_rate": 2.1173076923076926e-05, + "loss": 0.0348, + "step": 16460 + }, + { + "epoch": 18.46, + "learning_rate": 2.116736481340442e-05, + "loss": 0.0246, + "step": 16470 + }, + { + "epoch": 18.48, + "learning_rate": 2.116165270373191e-05, + "loss": 0.0273, + "step": 16480 + }, + { + "epoch": 18.49, + "learning_rate": 2.1155940594059406e-05, + "loss": 0.0275, + "step": 16490 + }, + { + "epoch": 18.5, + "learning_rate": 2.11502284843869e-05, + "loss": 0.0251, + "step": 16500 + }, + { + "epoch": 18.51, + "learning_rate": 2.1144516374714394e-05, + "loss": 0.0359, + "step": 16510 + }, + { + "epoch": 18.52, + "learning_rate": 2.113880426504189e-05, + "loss": 0.0254, + "step": 16520 + }, + { + "epoch": 18.53, + "learning_rate": 2.1133092155369384e-05, + "loss": 0.0272, + "step": 16530 + }, + { + "epoch": 18.54, + "learning_rate": 2.1127380045696878e-05, + "loss": 0.0255, + "step": 16540 + }, + { + "epoch": 18.55, + "learning_rate": 2.112166793602437e-05, + "loss": 0.0257, + "step": 16550 + }, + { + "epoch": 18.57, + "learning_rate": 2.1115955826351868e-05, + "loss": 0.0346, + "step": 16560 + }, + { + "epoch": 18.58, + "learning_rate": 2.111024371667936e-05, + "loss": 0.0272, + "step": 16570 + }, + { + "epoch": 18.59, + "learning_rate": 2.1104531607006855e-05, + "loss": 0.0266, + "step": 16580 + }, + { + "epoch": 18.6, + "learning_rate": 2.1098819497334352e-05, + "loss": 0.0272, + "step": 16590 + }, + { + "epoch": 18.61, + "learning_rate": 2.1093107387661846e-05, + "loss": 0.026, + "step": 16600 + }, + { + "epoch": 18.62, + "learning_rate": 2.1087395277989336e-05, + "loss": 0.0339, + "step": 16610 + }, + { + "epoch": 18.63, + "learning_rate": 2.108168316831683e-05, + "loss": 0.0269, + "step": 16620 + }, + { + "epoch": 18.64, + "learning_rate": 2.1075971058644326e-05, + "loss": 0.0281, + "step": 16630 + }, + { + "epoch": 18.65, + "learning_rate": 2.107025894897182e-05, + "loss": 0.027, + "step": 16640 + }, + { + "epoch": 18.67, + "learning_rate": 2.1064546839299313e-05, + "loss": 0.024, + "step": 16650 + }, + { + "epoch": 18.68, + "learning_rate": 2.105883472962681e-05, + "loss": 0.034, + "step": 16660 + }, + { + "epoch": 18.69, + "learning_rate": 2.1053122619954304e-05, + "loss": 0.0271, + "step": 16670 + }, + { + "epoch": 18.7, + "learning_rate": 2.1047410510281797e-05, + "loss": 0.0287, + "step": 16680 + }, + { + "epoch": 18.71, + "learning_rate": 2.1041698400609294e-05, + "loss": 0.025, + "step": 16690 + }, + { + "epoch": 18.72, + "learning_rate": 2.1035986290936788e-05, + "loss": 0.0243, + "step": 16700 + }, + { + "epoch": 18.73, + "learning_rate": 2.103027418126428e-05, + "loss": 0.0293, + "step": 16710 + }, + { + "epoch": 18.74, + "learning_rate": 2.1024562071591775e-05, + "loss": 0.027, + "step": 16720 + }, + { + "epoch": 18.76, + "learning_rate": 2.1018849961919272e-05, + "loss": 0.0258, + "step": 16730 + }, + { + "epoch": 18.77, + "learning_rate": 2.1013137852246762e-05, + "loss": 0.0237, + "step": 16740 + }, + { + "epoch": 18.78, + "learning_rate": 2.1007425742574256e-05, + "loss": 0.0214, + "step": 16750 + }, + { + "epoch": 18.79, + "learning_rate": 2.1001713632901752e-05, + "loss": 0.0356, + "step": 16760 + }, + { + "epoch": 18.8, + "learning_rate": 2.0996001523229246e-05, + "loss": 0.0277, + "step": 16770 + }, + { + "epoch": 18.81, + "learning_rate": 2.099028941355674e-05, + "loss": 0.0272, + "step": 16780 + }, + { + "epoch": 18.82, + "learning_rate": 2.0984577303884233e-05, + "loss": 0.023, + "step": 16790 + }, + { + "epoch": 18.83, + "learning_rate": 2.097886519421173e-05, + "loss": 0.0253, + "step": 16800 + }, + { + "epoch": 18.85, + "learning_rate": 2.0973153084539224e-05, + "loss": 0.0326, + "step": 16810 + }, + { + "epoch": 18.86, + "learning_rate": 2.0967440974866717e-05, + "loss": 0.03, + "step": 16820 + }, + { + "epoch": 18.87, + "learning_rate": 2.0961728865194214e-05, + "loss": 0.0236, + "step": 16830 + }, + { + "epoch": 18.88, + "learning_rate": 2.0956016755521708e-05, + "loss": 0.0207, + "step": 16840 + }, + { + "epoch": 18.89, + "learning_rate": 2.09503046458492e-05, + "loss": 0.0253, + "step": 16850 + }, + { + "epoch": 18.9, + "learning_rate": 2.0944592536176698e-05, + "loss": 0.0348, + "step": 16860 + }, + { + "epoch": 18.91, + "learning_rate": 2.0938880426504188e-05, + "loss": 0.0281, + "step": 16870 + }, + { + "epoch": 18.92, + "learning_rate": 2.0933168316831682e-05, + "loss": 0.0248, + "step": 16880 + }, + { + "epoch": 18.93, + "learning_rate": 2.0927456207159175e-05, + "loss": 0.0298, + "step": 16890 + }, + { + "epoch": 18.95, + "learning_rate": 2.0921744097486672e-05, + "loss": 0.0229, + "step": 16900 + }, + { + "epoch": 18.96, + "learning_rate": 2.0916031987814166e-05, + "loss": 0.0364, + "step": 16910 + }, + { + "epoch": 18.97, + "learning_rate": 2.091031987814166e-05, + "loss": 0.0276, + "step": 16920 + }, + { + "epoch": 18.98, + "learning_rate": 2.0904607768469156e-05, + "loss": 0.0304, + "step": 16930 + }, + { + "epoch": 18.99, + "learning_rate": 2.089889565879665e-05, + "loss": 0.0277, + "step": 16940 + }, + { + "epoch": 19.0, + "eval_cer": 1.1607502519960127, + "eval_loss": 0.0943588986992836, + "eval_runtime": 475.7712, + "eval_samples_per_second": 11.188, + "eval_steps_per_second": 1.4, + "eval_wer": 3.1508553940965056, + "step": 16948 } ], "max_steps": 53520, "num_train_epochs": 60, - "total_flos": 2.9639756729474507e+19, + "total_flos": 6.257282321655395e+19, "trial_name": null, "trial_params": null }