Update tokenizer_config.json
Browse filesIt seems `eos_token_id` is `<|end|>` (32007) instead of `<|endoftext|>` (32000).
Context: https://twitter.com/altryne/status/1783567596467491109?t=k5HHVmTCGDt4-TkXF8KyNw&s=19
- tokenizer_config.json +1 -1
tokenizer_config.json
CHANGED
@@ -118,7 +118,7 @@
|
|
118 |
"bos_token": "<s>",
|
119 |
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|system|>' + '\n' + message['content'] + '<|end|>' + '\n'}}{% elif (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif message['role'] == 'assistant' %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
|
120 |
"clean_up_tokenization_spaces": false,
|
121 |
-
"eos_token": "<|
|
122 |
"legacy": false,
|
123 |
"model_max_length": 131072,
|
124 |
"pad_token": "<|endoftext|>",
|
|
|
118 |
"bos_token": "<s>",
|
119 |
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|system|>' + '\n' + message['content'] + '<|end|>' + '\n'}}{% elif (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif message['role'] == 'assistant' %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
|
120 |
"clean_up_tokenization_spaces": false,
|
121 |
+
"eos_token": "<|end|>",
|
122 |
"legacy": false,
|
123 |
"model_max_length": 131072,
|
124 |
"pad_token": "<|endoftext|>",
|