comx commited on
Commit
9a7e26e
·
verified ·
1 Parent(s): de06080

Upload tokenizer

Browse files
Files changed (3) hide show
  1. merges.txt +0 -0
  2. tokenizer_config.json +3 -2
  3. vocab.json +0 -0
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "add_bos_token": false,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
- "50256": {
6
  "content": "<|endoftext|>",
7
  "lstrip": false,
8
  "normalized": true,
@@ -15,7 +15,8 @@
15
  "clean_up_tokenization_spaces": true,
16
  "eos_token": "<|endoftext|>",
17
  "errors": "replace",
18
- "model_max_length": 1024,
 
19
  "pad_token": "<|endoftext|>",
20
  "tokenizer_class": "GPT2Tokenizer",
21
  "unk_token": "<|endoftext|>"
 
2
  "add_bos_token": false,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
+ "0": {
6
  "content": "<|endoftext|>",
7
  "lstrip": false,
8
  "normalized": true,
 
15
  "clean_up_tokenization_spaces": true,
16
  "eos_token": "<|endoftext|>",
17
  "errors": "replace",
18
+ "max_len": 512,
19
+ "model_max_length": 512,
20
  "pad_token": "<|endoftext|>",
21
  "tokenizer_class": "GPT2Tokenizer",
22
  "unk_token": "<|endoftext|>"
vocab.json CHANGED
The diff for this file is too large to render. See raw diff