name: bpe_tokenizer | |
config_type: preprocessor | |
truncation_strategy: no_truncation | |
padding_strategy: no_padding | |
continuing_subword_prefix: '' | |
end_of_word_suffix: '' | |
fuse_unk: false | |
train_config: | |
name: bpe_tokenizer | |
config_type: preprocessor | |
vocab_size: 30000 | |
min_frequency: 2 | |
limit_alphabet: 1000 | |
show_progress: true | |