utakumi's picture
End of training
18e5c1e verified
{
"best_metric": 0.6012922525405884,
"best_model_checkpoint": "./wav2vec2-large-xlsr-53-common_voice-ja-demo-kana-only/checkpoint-2600",
"epoch": 15.0,
"eval_steps": 100,
"global_step": 5640,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.26595744680851063,
"eval_cer": 0.9998627637760951,
"eval_loss": 6.830887317657471,
"eval_runtime": 235.9339,
"eval_samples_per_second": 21.027,
"eval_steps_per_second": 2.632,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 0.5319148936170213,
"eval_cer": 0.9998627637760951,
"eval_loss": 4.129940986633301,
"eval_runtime": 235.1597,
"eval_samples_per_second": 21.096,
"eval_steps_per_second": 2.641,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 0.7978723404255319,
"eval_cer": 0.9869302678528182,
"eval_loss": 3.993030309677124,
"eval_runtime": 234.1893,
"eval_samples_per_second": 21.184,
"eval_steps_per_second": 2.652,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 1.0638297872340425,
"eval_cer": 0.5876132198847216,
"eval_loss": 2.0400278568267822,
"eval_runtime": 237.5888,
"eval_samples_per_second": 20.881,
"eval_steps_per_second": 2.614,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 1.3297872340425532,
"grad_norm": 3.4516775608062744,
"learning_rate": 0.00029759999999999997,
"loss": 7.1041,
"step": 500
},
{
"epoch": 1.3297872340425532,
"eval_cer": 0.43090559762339153,
"eval_loss": 1.0610116720199585,
"eval_runtime": 241.7004,
"eval_samples_per_second": 20.525,
"eval_steps_per_second": 2.569,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 1.5957446808510638,
"eval_cer": 0.39546636097970517,
"eval_loss": 0.8836929798126221,
"eval_runtime": 237.4265,
"eval_samples_per_second": 20.895,
"eval_steps_per_second": 2.616,
"eval_wer": 1.0,
"step": 600
},
{
"epoch": 1.8617021276595744,
"eval_cer": 0.3790545231444855,
"eval_loss": 0.7705639004707336,
"eval_runtime": 238.309,
"eval_samples_per_second": 20.818,
"eval_steps_per_second": 2.606,
"eval_wer": 0.9997984277363435,
"step": 700
},
{
"epoch": 2.127659574468085,
"eval_cer": 0.3815893569272002,
"eval_loss": 0.766192615032196,
"eval_runtime": 235.61,
"eval_samples_per_second": 21.056,
"eval_steps_per_second": 2.636,
"eval_wer": 1.0,
"step": 800
},
{
"epoch": 2.393617021276596,
"eval_cer": 0.37896572323489996,
"eval_loss": 0.7620505094528198,
"eval_runtime": 237.8852,
"eval_samples_per_second": 20.855,
"eval_steps_per_second": 2.611,
"eval_wer": 1.0,
"step": 900
},
{
"epoch": 2.6595744680851063,
"grad_norm": 2.764026165008545,
"learning_rate": 0.00027105058365758754,
"loss": 0.803,
"step": 1000
},
{
"epoch": 2.6595744680851063,
"eval_cer": 0.36259424899494647,
"eval_loss": 0.6968890428543091,
"eval_runtime": 238.5042,
"eval_samples_per_second": 20.8,
"eval_steps_per_second": 2.604,
"eval_wer": 1.0,
"step": 1000
},
{
"epoch": 2.925531914893617,
"eval_cer": 0.3572904725769734,
"eval_loss": 0.6735507249832153,
"eval_runtime": 242.6929,
"eval_samples_per_second": 20.441,
"eval_steps_per_second": 2.559,
"eval_wer": 1.0,
"step": 1100
},
{
"epoch": 3.1914893617021276,
"eval_cer": 0.3543762209987568,
"eval_loss": 0.6823433637619019,
"eval_runtime": 250.863,
"eval_samples_per_second": 19.776,
"eval_steps_per_second": 2.475,
"eval_wer": 0.9997984277363435,
"step": 1200
},
{
"epoch": 3.4574468085106385,
"eval_cer": 0.3460209567786622,
"eval_loss": 0.6359558701515198,
"eval_runtime": 237.6152,
"eval_samples_per_second": 20.878,
"eval_steps_per_second": 2.613,
"eval_wer": 1.0,
"step": 1300
},
{
"epoch": 3.723404255319149,
"eval_cer": 0.34429339490127064,
"eval_loss": 0.6503622531890869,
"eval_runtime": 235.7171,
"eval_samples_per_second": 21.046,
"eval_steps_per_second": 2.635,
"eval_wer": 1.0,
"step": 1400
},
{
"epoch": 3.9893617021276597,
"grad_norm": 1.9848517179489136,
"learning_rate": 0.00024186770428015562,
"loss": 0.5675,
"step": 1500
},
{
"epoch": 3.9893617021276597,
"eval_cer": 0.34139528876116054,
"eval_loss": 0.6246533393859863,
"eval_runtime": 239.1195,
"eval_samples_per_second": 20.747,
"eval_steps_per_second": 2.597,
"eval_wer": 1.0,
"step": 1500
},
{
"epoch": 4.25531914893617,
"eval_cer": 0.3424851058333468,
"eval_loss": 0.6397080421447754,
"eval_runtime": 242.7125,
"eval_samples_per_second": 20.44,
"eval_steps_per_second": 2.559,
"eval_wer": 0.9997984277363435,
"step": 1600
},
{
"epoch": 4.5212765957446805,
"eval_cer": 0.3439462679819817,
"eval_loss": 0.6589427590370178,
"eval_runtime": 243.7764,
"eval_samples_per_second": 20.351,
"eval_steps_per_second": 2.547,
"eval_wer": 1.0,
"step": 1700
},
{
"epoch": 4.787234042553192,
"eval_cer": 0.3449472851445824,
"eval_loss": 0.6345330476760864,
"eval_runtime": 237.3086,
"eval_samples_per_second": 20.905,
"eval_steps_per_second": 2.617,
"eval_wer": 1.0,
"step": 1800
},
{
"epoch": 5.053191489361702,
"eval_cer": 0.3379805286016436,
"eval_loss": 0.652181088924408,
"eval_runtime": 240.2379,
"eval_samples_per_second": 20.65,
"eval_steps_per_second": 2.585,
"eval_wer": 0.999596855472687,
"step": 1900
},
{
"epoch": 5.319148936170213,
"grad_norm": 2.756148099899292,
"learning_rate": 0.00021268482490272372,
"loss": 0.4421,
"step": 2000
},
{
"epoch": 5.319148936170213,
"eval_cer": 0.3371732566963205,
"eval_loss": 0.6292724609375,
"eval_runtime": 240.5588,
"eval_samples_per_second": 20.623,
"eval_steps_per_second": 2.581,
"eval_wer": 1.0,
"step": 2000
},
{
"epoch": 5.585106382978723,
"eval_cer": 0.3342428596799974,
"eval_loss": 0.6095719337463379,
"eval_runtime": 241.1797,
"eval_samples_per_second": 20.57,
"eval_steps_per_second": 2.575,
"eval_wer": 1.0,
"step": 2100
},
{
"epoch": 5.851063829787234,
"eval_cer": 0.3320712982546781,
"eval_loss": 0.6107529401779175,
"eval_runtime": 238.2079,
"eval_samples_per_second": 20.826,
"eval_steps_per_second": 2.607,
"eval_wer": 1.0,
"step": 2200
},
{
"epoch": 6.117021276595745,
"eval_cer": 0.3354295493808224,
"eval_loss": 0.6200144290924072,
"eval_runtime": 235.9894,
"eval_samples_per_second": 21.022,
"eval_steps_per_second": 2.631,
"eval_wer": 1.0,
"step": 2300
},
{
"epoch": 6.382978723404255,
"eval_cer": 0.33407333257987953,
"eval_loss": 0.6413326263427734,
"eval_runtime": 236.6024,
"eval_samples_per_second": 20.968,
"eval_steps_per_second": 2.625,
"eval_wer": 1.0,
"step": 2400
},
{
"epoch": 6.648936170212766,
"grad_norm": 4.183356761932373,
"learning_rate": 0.00018350194552529183,
"loss": 0.3699,
"step": 2500
},
{
"epoch": 6.648936170212766,
"eval_cer": 0.3359462034002293,
"eval_loss": 0.6303403973579407,
"eval_runtime": 237.052,
"eval_samples_per_second": 20.928,
"eval_steps_per_second": 2.62,
"eval_wer": 0.999596855472687,
"step": 2500
},
{
"epoch": 6.914893617021277,
"eval_cer": 0.3307796632061611,
"eval_loss": 0.6012922525405884,
"eval_runtime": 233.4251,
"eval_samples_per_second": 21.253,
"eval_steps_per_second": 2.66,
"eval_wer": 1.0,
"step": 2600
},
{
"epoch": 7.180851063829787,
"eval_cer": 0.32859195634273536,
"eval_loss": 0.634281575679779,
"eval_runtime": 232.2994,
"eval_samples_per_second": 21.356,
"eval_steps_per_second": 2.673,
"eval_wer": 1.0,
"step": 2700
},
{
"epoch": 7.446808510638298,
"eval_cer": 0.3260086862457013,
"eval_loss": 0.6208234429359436,
"eval_runtime": 235.1613,
"eval_samples_per_second": 21.096,
"eval_steps_per_second": 2.641,
"eval_wer": 0.9997984277363435,
"step": 2800
},
{
"epoch": 7.712765957446808,
"eval_cer": 0.3287291925666403,
"eval_loss": 0.6095162034034729,
"eval_runtime": 241.6677,
"eval_samples_per_second": 20.528,
"eval_steps_per_second": 2.57,
"eval_wer": 0.9997984277363435,
"step": 2900
},
{
"epoch": 7.9787234042553195,
"grad_norm": 1.701768159866333,
"learning_rate": 0.0001543190661478599,
"loss": 0.3146,
"step": 3000
},
{
"epoch": 7.9787234042553195,
"eval_cer": 0.3265818492984807,
"eval_loss": 0.6058351397514343,
"eval_runtime": 235.659,
"eval_samples_per_second": 21.052,
"eval_steps_per_second": 2.635,
"eval_wer": 0.999596855472687,
"step": 3000
},
{
"epoch": 8.24468085106383,
"eval_cer": 0.3251368325879523,
"eval_loss": 0.6612707376480103,
"eval_runtime": 233.8878,
"eval_samples_per_second": 21.211,
"eval_steps_per_second": 2.655,
"eval_wer": 0.999596855472687,
"step": 3100
},
{
"epoch": 8.51063829787234,
"eval_cer": 0.3244022151541082,
"eval_loss": 0.6538846492767334,
"eval_runtime": 235.5702,
"eval_samples_per_second": 21.06,
"eval_steps_per_second": 2.636,
"eval_wer": 1.0,
"step": 3200
},
{
"epoch": 8.77659574468085,
"eval_cer": 0.32638810404120316,
"eval_loss": 0.6331284046173096,
"eval_runtime": 236.8675,
"eval_samples_per_second": 20.944,
"eval_steps_per_second": 2.622,
"eval_wer": 1.0,
"step": 3300
},
{
"epoch": 9.042553191489361,
"eval_cer": 0.32281996221967485,
"eval_loss": 0.6436325907707214,
"eval_runtime": 236.1998,
"eval_samples_per_second": 21.003,
"eval_steps_per_second": 2.629,
"eval_wer": 1.0,
"step": 3400
},
{
"epoch": 9.308510638297872,
"grad_norm": 1.067522406578064,
"learning_rate": 0.000125136186770428,
"loss": 0.2576,
"step": 3500
},
{
"epoch": 9.308510638297872,
"eval_cer": 0.32351421605825276,
"eval_loss": 0.6328682899475098,
"eval_runtime": 234.2706,
"eval_samples_per_second": 21.176,
"eval_steps_per_second": 2.651,
"eval_wer": 1.0,
"step": 3500
},
{
"epoch": 9.574468085106384,
"eval_cer": 0.3196958199460742,
"eval_loss": 0.6314510703086853,
"eval_runtime": 234.3665,
"eval_samples_per_second": 21.168,
"eval_steps_per_second": 2.65,
"eval_wer": 0.9997984277363435,
"step": 3600
},
{
"epoch": 9.840425531914894,
"eval_cer": 0.32033356475127955,
"eval_loss": 0.6280702352523804,
"eval_runtime": 236.442,
"eval_samples_per_second": 20.982,
"eval_steps_per_second": 2.626,
"eval_wer": 0.9997984277363435,
"step": 3700
},
{
"epoch": 10.106382978723405,
"eval_cer": 0.3196473836317548,
"eval_loss": 0.6696433424949646,
"eval_runtime": 232.6849,
"eval_samples_per_second": 21.321,
"eval_steps_per_second": 2.669,
"eval_wer": 0.999596855472687,
"step": 3800
},
{
"epoch": 10.372340425531915,
"eval_cer": 0.3199218560795647,
"eval_loss": 0.6629988551139832,
"eval_runtime": 233.1379,
"eval_samples_per_second": 21.279,
"eval_steps_per_second": 2.664,
"eval_wer": 0.999596855472687,
"step": 3900
},
{
"epoch": 10.638297872340425,
"grad_norm": 1.6601381301879883,
"learning_rate": 9.595330739299609e-05,
"loss": 0.2201,
"step": 4000
},
{
"epoch": 10.638297872340425,
"eval_cer": 0.3203093465941198,
"eval_loss": 0.6781216859817505,
"eval_runtime": 232.2432,
"eval_samples_per_second": 21.361,
"eval_steps_per_second": 2.674,
"eval_wer": 1.0,
"step": 4000
},
{
"epoch": 10.904255319148936,
"eval_cer": 0.31962316547459513,
"eval_loss": 0.6531046628952026,
"eval_runtime": 239.9955,
"eval_samples_per_second": 20.671,
"eval_steps_per_second": 2.588,
"eval_wer": 1.0,
"step": 4100
},
{
"epoch": 11.170212765957446,
"eval_cer": 0.3192598931171997,
"eval_loss": 0.6762946248054504,
"eval_runtime": 235.4369,
"eval_samples_per_second": 21.071,
"eval_steps_per_second": 2.638,
"eval_wer": 0.9997984277363435,
"step": 4200
},
{
"epoch": 11.436170212765958,
"eval_cer": 0.3184364757737701,
"eval_loss": 0.6785323023796082,
"eval_runtime": 232.8132,
"eval_samples_per_second": 21.309,
"eval_steps_per_second": 2.667,
"eval_wer": 1.0,
"step": 4300
},
{
"epoch": 11.702127659574469,
"eval_cer": 0.31793596719246975,
"eval_loss": 0.666408360004425,
"eval_runtime": 232.0593,
"eval_samples_per_second": 21.378,
"eval_steps_per_second": 2.676,
"eval_wer": 0.9997984277363435,
"step": 4400
},
{
"epoch": 11.96808510638298,
"grad_norm": 0.7692495584487915,
"learning_rate": 6.67704280155642e-05,
"loss": 0.1931,
"step": 4500
},
{
"epoch": 11.96808510638298,
"eval_cer": 0.31842033033566364,
"eval_loss": 0.6682071089744568,
"eval_runtime": 235.6644,
"eval_samples_per_second": 21.051,
"eval_steps_per_second": 2.635,
"eval_wer": 0.9997984277363435,
"step": 4500
},
{
"epoch": 12.23404255319149,
"eval_cer": 0.3168219319631238,
"eval_loss": 0.6799555420875549,
"eval_runtime": 235.3483,
"eval_samples_per_second": 21.079,
"eval_steps_per_second": 2.639,
"eval_wer": 0.9997984277363435,
"step": 4600
},
{
"epoch": 12.5,
"eval_cer": 0.3162245507531847,
"eval_loss": 0.6925452351570129,
"eval_runtime": 235.4523,
"eval_samples_per_second": 21.07,
"eval_steps_per_second": 2.637,
"eval_wer": 1.0,
"step": 4700
},
{
"epoch": 12.76595744680851,
"eval_cer": 0.3144646979995802,
"eval_loss": 0.7046905159950256,
"eval_runtime": 235.1343,
"eval_samples_per_second": 21.099,
"eval_steps_per_second": 2.641,
"eval_wer": 1.0,
"step": 4800
},
{
"epoch": 13.03191489361702,
"eval_cer": 0.3147230250092836,
"eval_loss": 0.6918711066246033,
"eval_runtime": 238.6326,
"eval_samples_per_second": 20.789,
"eval_steps_per_second": 2.602,
"eval_wer": 0.9997984277363435,
"step": 4900
},
{
"epoch": 13.297872340425531,
"grad_norm": 1.052759051322937,
"learning_rate": 3.758754863813229e-05,
"loss": 0.1694,
"step": 5000
},
{
"epoch": 13.297872340425531,
"eval_cer": 0.31423866186608973,
"eval_loss": 0.699897289276123,
"eval_runtime": 256.2343,
"eval_samples_per_second": 19.361,
"eval_steps_per_second": 2.424,
"eval_wer": 0.9997984277363435,
"step": 5000
},
{
"epoch": 13.563829787234042,
"eval_cer": 0.3134394626798198,
"eval_loss": 0.6994884610176086,
"eval_runtime": 236.0872,
"eval_samples_per_second": 21.013,
"eval_steps_per_second": 2.63,
"eval_wer": 1.0,
"step": 5100
},
{
"epoch": 13.829787234042554,
"eval_cer": 0.3133829536464472,
"eval_loss": 0.6916852593421936,
"eval_runtime": 234.7745,
"eval_samples_per_second": 21.131,
"eval_steps_per_second": 2.645,
"eval_wer": 0.9997984277363435,
"step": 5200
},
{
"epoch": 14.095744680851064,
"eval_cer": 0.3128501541889339,
"eval_loss": 0.6962644457817078,
"eval_runtime": 235.7153,
"eval_samples_per_second": 21.047,
"eval_steps_per_second": 2.635,
"eval_wer": 0.9997984277363435,
"step": 5300
},
{
"epoch": 14.361702127659575,
"eval_cer": 0.3128259360317742,
"eval_loss": 0.6961241960525513,
"eval_runtime": 245.639,
"eval_samples_per_second": 20.196,
"eval_steps_per_second": 2.528,
"eval_wer": 0.9997984277363435,
"step": 5400
},
{
"epoch": 14.627659574468085,
"grad_norm": 2.195582389831543,
"learning_rate": 8.404669260700388e-06,
"loss": 0.1548,
"step": 5500
},
{
"epoch": 14.627659574468085,
"eval_cer": 0.31290666322230654,
"eval_loss": 0.6963828802108765,
"eval_runtime": 237.6797,
"eval_samples_per_second": 20.873,
"eval_steps_per_second": 2.613,
"eval_wer": 1.0,
"step": 5500
},
{
"epoch": 14.893617021276595,
"eval_cer": 0.3126725543697628,
"eval_loss": 0.6983669400215149,
"eval_runtime": 245.8138,
"eval_samples_per_second": 20.182,
"eval_steps_per_second": 2.526,
"eval_wer": 0.9997984277363435,
"step": 5600
},
{
"epoch": 15.0,
"step": 5640,
"total_flos": 2.6348448154740388e+19,
"train_loss": 0.9431738045198698,
"train_runtime": 36296.3273,
"train_samples_per_second": 4.972,
"train_steps_per_second": 0.155
}
],
"logging_steps": 500,
"max_steps": 5640,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.6348448154740388e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}