SELFIESTokenizer / tokenizer_config.json
michaelsyao's picture
Upload tokenizer
f95a804 verified
raw
history blame
13.2 kB
{
"added_tokens_decoder": {
"0": {
"content": "<start>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<stop>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "[#C-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"3": {
"content": "[#C]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"4": {
"content": "[#N+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"5": {
"content": "[#N]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"6": {
"content": "[#S]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"7": {
"content": "[=B-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"8": {
"content": "[=Branch1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"9": {
"content": "[=Branch2]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10": {
"content": "[=C]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"11": {
"content": "[=N+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"12": {
"content": "[=N-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"13": {
"content": "[=NH1+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"14": {
"content": "[=NH2+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"15": {
"content": "[=N]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"16": {
"content": "[=O+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"17": {
"content": "[=OH1+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"18": {
"content": "[=O]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"19": {
"content": "[=PH1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"20": {
"content": "[=P]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"21": {
"content": "[=Ring1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"22": {
"content": "[=Ring2]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"23": {
"content": "[=S+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"24": {
"content": "[=SH1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"25": {
"content": "[=S]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"26": {
"content": "[=Se+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"27": {
"content": "[=Se]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"28": {
"content": "[=Si]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"29": {
"content": "[B-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30": {
"content": "[BH1-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"31": {
"content": "[BH2-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32": {
"content": "[BH3-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"33": {
"content": "[B]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34": {
"content": "[Br]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"35": {
"content": "[Branch1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"36": {
"content": "[Branch2]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"37": {
"content": "[C+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"38": {
"content": "[C-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39": {
"content": "[CH1-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"40": {
"content": "[C]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"41": {
"content": "[Cl]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"42": {
"content": "[F]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"43": {
"content": "[H]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"44": {
"content": "[I+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"45": {
"content": "[I-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46": {
"content": "[I]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"47": {
"content": "[N+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"48": {
"content": "[N-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"49": {
"content": "[NH1+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50": {
"content": "[NH1-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"51": {
"content": "[NH1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"52": {
"content": "[NH2+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"53": {
"content": "[NH3+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"54": {
"content": "[N]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"55": {
"content": "[O+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"56": {
"content": "[O-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57": {
"content": "[OH0]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"58": {
"content": "[OH1+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"59": {
"content": "[O]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"60": {
"content": "[P+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"61": {
"content": "[PH1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"62": {
"content": "[P]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"63": {
"content": "[Ring1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"64": {
"content": "[Ring2]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65": {
"content": "[S+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"66": {
"content": "[S-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"67": {
"content": "[SH1+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"68": {
"content": "[SH1-1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"69": {
"content": "[SH1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"70": {
"content": "[S]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"71": {
"content": "[Se+1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"72": {
"content": "[SeH1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"73": {
"content": "[Se]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"74": {
"content": "[SiH1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"75": {
"content": "[SiH2]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"76": {
"content": "[Si]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<start>",
"clean_up_tokenization_spaces": false,
"eos_token": "<stop>",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<stop>",
"tokenizer_class": "SELFIESTokenizer"
}