File size: 431 Bytes
889f931 9fe7716 681493a 594b1ec 889f931 e2aeff5 889f931 9fe7716 889f931 9fe7716 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
name: bpe_tokenizer
config_type: preprocessor
truncation_strategy: longest_first
truncation_direction: right
padding_strategy: longest
padding_direction: right
pad_token_id: 0
pad_token: <pad>
pad_token_type_id: 0
continuing_subword_prefix: ''
end_of_word_suffix: ''
fuse_unk: false
train_config:
name: bpe_tokenizer
config_type: preprocessor
vocab_size: 30000
min_frequency: 2
limit_alphabet: 1000
show_progress: true
|