Pclanglais commited on
Commit
5fc8f56
·
verified ·
1 Parent(s): 291e715

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +81 -1
  2. tokenizer.json +100 -0
  3. tokenizer_config.json +102 -0
special_tokens_map.json CHANGED
@@ -1 +1,81 @@
1
- {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|source_id|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|source_analysis_start|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<|source_analysis_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<|source_start|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<|source_end|>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<|answer_start|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<|answer_end|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<|query_start|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<|query_end|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<|source_interpretation_start|>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<|source_interpretation_end|>",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ }
80
+ ]
81
+ }
tokenizer.json CHANGED
@@ -38,6 +38,105 @@
38
  "rstrip": false,
39
  "normalized": false,
40
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
  ],
43
  "normalizer": null,
@@ -60,6 +159,7 @@
60
  }
61
  ]
62
  },
 
63
  "decoder": {
64
  "type": "ByteLevel",
65
  "add_prefix_space": true,
 
38
  "rstrip": false,
39
  "normalized": false,
40
  "special": true
41
+ },
42
+ {
43
+ "id": 65525,
44
+ "content": "<|source_id|>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ {
52
+ "id": 65526,
53
+ "content": "<|source_analysis_start|>",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
+ },
60
+ {
61
+ "id": 65527,
62
+ "content": "<|source_analysis_end|>",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": false,
67
+ "special": true
68
+ },
69
+ {
70
+ "id": 65528,
71
+ "content": "<|source_start|>",
72
+ "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": false,
76
+ "special": true
77
+ },
78
+ {
79
+ "id": 65529,
80
+ "content": "<|source_end|>",
81
+ "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": false,
85
+ "special": true
86
+ },
87
+ {
88
+ "id": 65530,
89
+ "content": "<|answer_start|>",
90
+ "single_word": false,
91
+ "lstrip": false,
92
+ "rstrip": false,
93
+ "normalized": false,
94
+ "special": true
95
+ },
96
+ {
97
+ "id": 65531,
98
+ "content": "<|answer_end|>",
99
+ "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
+ },
105
+ {
106
+ "id": 65532,
107
+ "content": "<|query_start|>",
108
+ "single_word": false,
109
+ "lstrip": false,
110
+ "rstrip": false,
111
+ "normalized": false,
112
+ "special": true
113
+ },
114
+ {
115
+ "id": 65533,
116
+ "content": "<|query_end|>",
117
+ "single_word": false,
118
+ "lstrip": false,
119
+ "rstrip": false,
120
+ "normalized": false,
121
+ "special": true
122
+ },
123
+ {
124
+ "id": 65534,
125
+ "content": "<|source_interpretation_start|>",
126
+ "single_word": false,
127
+ "lstrip": false,
128
+ "rstrip": false,
129
+ "normalized": false,
130
+ "special": true
131
+ },
132
+ {
133
+ "id": 65535,
134
+ "content": "<|source_interpretation_end|>",
135
+ "single_word": false,
136
+ "lstrip": false,
137
+ "rstrip": false,
138
+ "normalized": false,
139
+ "special": true
140
  }
141
  ],
142
  "normalizer": null,
 
159
  }
160
  ]
161
  },
162
+ "post_processor": null,
163
  "decoder": {
164
  "type": "ByteLevel",
165
  "add_prefix_space": true,
tokenizer_config.json CHANGED
@@ -31,9 +31,111 @@
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  "clean_up_tokenization_spaces": true,
 
37
  "model_max_length": 1000000000000000019884624838656,
38
  "tokenizer_class": "PreTrainedTokenizerFast"
39
  }
 
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
+ },
35
+ "65525": {
36
+ "content": "<|source_id|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "65526": {
44
+ "content": "<|source_analysis_start|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "65527": {
52
+ "content": "<|source_analysis_end|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "65528": {
60
+ "content": "<|source_start|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "65529": {
68
+ "content": "<|source_end|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "65530": {
76
+ "content": "<|answer_start|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "65531": {
84
+ "content": "<|answer_end|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "65532": {
92
+ "content": "<|query_start|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "65533": {
100
+ "content": "<|query_end|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "65534": {
108
+ "content": "<|source_interpretation_start|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "65535": {
116
+ "content": "<|source_interpretation_end|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
  }
123
  },
124
+ "additional_special_tokens": [
125
+ "<|source_id|>",
126
+ "<|source_analysis_start|>",
127
+ "<|source_analysis_end|>",
128
+ "<|source_start|>",
129
+ "<|source_end|>",
130
+ "<|answer_start|>",
131
+ "<|answer_end|>",
132
+ "<|query_start|>",
133
+ "<|query_end|>",
134
+ "<|source_interpretation_start|>",
135
+ "<|source_interpretation_end|>"
136
+ ],
137
  "clean_up_tokenization_spaces": true,
138
+ "extra_special_tokens": {},
139
  "model_max_length": 1000000000000000019884624838656,
140
  "tokenizer_class": "PreTrainedTokenizerFast"
141
  }