|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
token_type: unigram |
|
output_folder: results/unigram |
|
train_log: results/unigram/train_log.txt |
|
|
|
|
|
data_folder: ../CommonVoice/ |
|
csv_dir: ../cv_de_acc |
|
train_tsv_file: ../CommonVoice//train.tsv |
|
dev_tsv_file: ../CommonVoice//dev.tsv |
|
test_tsv_file: ../CommonVoice//test.tsv |
|
accented_letters: true |
|
language: de |
|
skip_prep: false |
|
|
|
|
|
|
|
|
|
train_csv: ../cv_de_acc/train.csv |
|
valid_csv: ../cv_de_acc/dev.csv |
|
|
|
|
|
token_output: 5000 |
|
character_coverage: 1.0 |
|
csv_read: wrd |
|
|
|
|
|
tokenizer: !name:speechbrain.tokenizers.SentencePiece.SentencePiece |
|
model_dir: results/unigram |
|
vocab_size: 5000 |
|
annotation_train: ../cv_de_acc/train.csv |
|
annotation_read: wrd |
|
model_type: unigram |
|
character_coverage: 1.0 |
|
annotation_list_to_check: [../cv_de_acc/train.csv, ../cv_de_acc/dev.csv] |
|
|