File size: 366 Bytes
1c844b4
40b30cb
bfe6cab
 
eaaad3f
90450d3
 
 
 
40b30cb
90450d3
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
name: wordpiece_tokenizer
config_type: preprocessor
truncation_side: right
padding_side: right
stride: 0
pad_to_multiple_of: 0
pad_token_type_id: 0
unk_token: '[UNK]'
sep_token: '[SEP]'
pad_token: '[PAD]'
cls_token: '[CLS]'
mask_token: '[MASK]'
wordpieces_prefix: '##'
vocab_size: 30000
min_frequency: 2
limit_alphabet: 1000
initial_alphabet: []
show_progress: true