kcz358 commited on
Commit
b721bd3
1 Parent(s): 63a991b

Upload tokenizer

Browse files
README.md CHANGED
@@ -1,6 +1,4 @@
1
  ---
2
- # For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
3
- # Doc / guide: https://huggingface.co/docs/hub/model-cards
4
  {}
5
  ---
6
 
 
1
  ---
 
 
2
  {}
3
  ---
4
 
added_tokens.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "<|endoftext|>": 151643,
3
  "<|im_end|>": 151645,
4
  "<|im_start|>": 151644
 
1
  {
2
+ "<image>": 151646,
3
  "<|endoftext|>": 151643,
4
  "<|im_end|>": 151645,
5
  "<|im_start|>": 151644
special_tokens_map.json CHANGED
@@ -3,6 +3,13 @@
3
  "<|im_start|>",
4
  "<|im_end|>"
5
  ],
 
 
 
 
 
 
 
6
  "eos_token": {
7
  "content": "<|im_end|>",
8
  "lstrip": false,
 
3
  "<|im_start|>",
4
  "<|im_end|>"
5
  ],
6
+ "bos_token": {
7
+ "content": "<|im_start|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
  "eos_token": {
14
  "content": "<|im_end|>",
15
  "lstrip": false,
tokenizer.json CHANGED
@@ -29,6 +29,15 @@
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
@@ -73,6 +82,7 @@
73
  "end_of_word_suffix": "",
74
  "fuse_unk": false,
75
  "byte_fallback": false,
 
76
  "vocab": {
77
  "!": 0,
78
  "\"": 1,
 
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
32
+ },
33
+ {
34
+ "id": 151646,
35
+ "content": "<image>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
  }
42
  ],
43
  "normalizer": {
 
82
  "end_of_word_suffix": "",
83
  "fuse_unk": false,
84
  "byte_fallback": false,
85
+ "ignore_merges": false,
86
  "vocab": {
87
  "!": 0,
88
  "\"": 1,
tokenizer_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "151643": {
5
  "content": "<|endoftext|>",
@@ -24,14 +23,22 @@
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
 
 
 
 
 
 
 
 
27
  }
28
  },
29
  "additional_special_tokens": [
30
  "<|im_start|>",
31
  "<|im_end|>"
32
  ],
33
- "bos_token": null,
34
- "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
  "eos_token": "<|im_end|>",
37
  "errors": "replace",
 
1
  {
 
2
  "added_tokens_decoder": {
3
  "151643": {
4
  "content": "<|endoftext|>",
 
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
+ },
27
+ "151646": {
28
+ "content": "<image>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
  }
35
  },
36
  "additional_special_tokens": [
37
  "<|im_start|>",
38
  "<|im_end|>"
39
  ],
40
+ "bos_token": "<|im_start|>",
41
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
42
  "clean_up_tokenization_spaces": false,
43
  "eos_token": "<|im_end|>",
44
  "errors": "replace",