File size: 1,215 Bytes
8b54d62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
{
"dataset_name": "mozilla-foundation/common_voice_13_0",
"model_name_or_path": "facebook/wav2vec2-large-xlsr-53",
"dataset_config_name": "eo",
"output_dir": "./wav2vec2-common_voice_13_0-eo-10",
"train_split_name": "train",
"eval_split_name": "validation",
"eval_metrics": ["cer", "wer"],
"overwrite_output_dir": false,
"preprocessing_num_workers": 1,
"num_train_epochs": 5,
"per_device_train_batch_size": 16,
"gradient_accumulation_steps": 2,
"gradient_checkpointing": true,
"learning_rate": 3e-5,
"warmup_steps": 500,
"evaluation_strategy": "steps",
"text_column_name": "sentence",
"length_column_name": "input_length",
"save_steps": 1000,
"eval_steps": 1000,
"layerdrop": 0.2,
"save_total_limit": 3,
"freeze_feature_encoder": true,
"chars_to_ignore": "-!\"'(),.:;=?_`¨«¸»ʼ‑–—‘’“”„…‹›♫?",
"chars_to_substitute": {
"przy": "pŝe",
"byn": "bin",
"cx": "ĉ",
"sx": "ŝ",
"fi": "fi",
"fl": "fl",
"ǔ": "ŭ",
"ñ": "nj",
"á": "a",
"é": "e",
"ü": "ŭ",
"y": "j",
"qu": "ku"
},
"fp16": true,
"group_by_length": true,
"push_to_hub": true,
"do_train": true,
"do_eval": true
}
|