document_1 / tokenizer_config.json
Pclanglais's picture
Upload folder using huggingface_hub
8f16342 verified
{
"added_tokens_decoder": {
"0": {
"content": "[UNK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<|begin_of_text|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<|end_of_text|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65514": {
"content": "<|language|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65515": {
"content": "<|synopsis|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65516": {
"content": "<|keywords|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65517": {
"content": "<|title_english|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65518": {
"content": "<|title_original|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65519": {
"content": "<|format|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65520": {
"content": "<|audience|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65521": {
"content": "<|discipline|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65522": {
"content": "<|sentiment|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65523": {
"content": "<|brand|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65524": {
"content": "<|time|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65525": {
"content": "<|location|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65526": {
"content": "<|country|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65527": {
"content": "<|protagonist|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65528": {
"content": "<|other_character|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65529": {
"content": "<|bibliography|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65530": {
"content": "<|quote|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65531": {
"content": "<|link|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65532": {
"content": "<|text_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65533": {
"content": "<|text_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65534": {
"content": "<|annotation_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65535": {
"content": "<|annotation_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<|language|>",
"<|synopsis|>",
"<|keywords|>",
"<|title_english|>",
"<|title_original|>",
"<|format|>",
"<|audience|>",
"<|discipline|>",
"<|sentiment|>",
"<|brand|>",
"<|time|>",
"<|location|>",
"<|country|>",
"<|protagonist|>",
"<|other_character|>",
"<|bibliography|>",
"<|quote|>",
"<|link|>",
"<|text_start|>",
"<|text_end|>",
"<|annotation_start|>",
"<|annotation_end|>"
],
"clean_up_tokenization_spaces": true,
"model_max_length": 1000000000000000019884624838656,
"tokenizer_class": "PreTrainedTokenizerFast"
}