name: sentencepiece_unigram_tokenizer config_type: preprocessor max_length: 512 truncation: longest_first truncation_side: right stride: 0 padding: longest padding_side: right pad_to_multiple_of: 0 pad_token_type_id: 0 bos_token: eos_token: unk_token: sep_token: pad_token: cls_token: mask_token: continuing_subword_prefix: '' replacement: _ add_prefix_space: true end_of_word_suffix: '' fuse_unk: false vocab_size: 32103 min_frequency: 2 limit_alphabet: 1000 initial_alphabet: [] show_progress: true