AOLCDROM commited on
Commit
eae681c
1 Parent(s): dafc8b9

Upload tokenizer-latin-1.json

Browse files
Files changed (1) hide show
  1. tokenizers/tokenizer-latin-1.json +156 -0
tokenizers/tokenizer-latin-1.json ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[STOP]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[UNK]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[SPACE]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ }
33
+ ],
34
+ "normalizer": null,
35
+ "pre_tokenizer": {
36
+ "type": "Whitespace"
37
+ },
38
+ "post_processor": null,
39
+ "decoder": null,
40
+ "model": {
41
+ "type": "BPE",
42
+ "dropout": null,
43
+ "unk_token": "[UNK]",
44
+ "continuing_subword_prefix": null,
45
+ "end_of_word_suffix": null,
46
+ "fuse_unk": false,
47
+ "byte_fallback": false,
48
+ "vocab": {
49
+ "[STOP]": 0,
50
+ "[UNK]": 1,
51
+ "[SPACE]": 2,
52
+ "!":3,
53
+ "'":4,
54
+ "(":5,
55
+ ")":6,
56
+ ",":7,
57
+ "-":8,
58
+ ".":9,
59
+ "/":10,
60
+ ":":11,
61
+ ";":12,
62
+ "?":13,
63
+ "a":14,
64
+ "b":15,
65
+ "c":16,
66
+ "d":17,
67
+ "e":18,
68
+ "f":19,
69
+ "g":20,
70
+ "h":21,
71
+ "i":22,
72
+ "j":23,
73
+ "k":24,
74
+ "l":25,
75
+ "m":26,
76
+ "n":27,
77
+ "o":28,
78
+ "p":29,
79
+ "q":30,
80
+ "r":31,
81
+ "s":32,
82
+ "t":33,
83
+ "u":34,
84
+ "v":35,
85
+ "w":36,
86
+ "x":37,
87
+ "y":38,
88
+ "z":39,
89
+ "¿": 40,
90
+ "À": 41,
91
+ "Á": 42,
92
+ "Â": 43,
93
+ "Ã": 44,
94
+ "Ä": 45,
95
+ "Å": 46,
96
+ "Æ": 47,
97
+ "Ç": 48,
98
+ "È": 49,
99
+ "É": 50,
100
+ "Ê": 51,
101
+ "Ë": 52,
102
+ "Ì": 53,
103
+ "Í": 54,
104
+ "Î": 55,
105
+ "Ï": 56,
106
+ "Ð": 57,
107
+ "Ñ": 58,
108
+ "Ò": 59,
109
+ "Ó": 60,
110
+ "Ô": 61,
111
+ "Õ": 62,
112
+ "Ö": 63,
113
+ "Ø": 64,
114
+ "Ù": 65,
115
+ "Ú": 66,
116
+ "Û": 67,
117
+ "Ü": 68,
118
+ "Ý": 69,
119
+ "Þ": 70,
120
+ "ß": 71,
121
+ "à": 72,
122
+ "á": 73,
123
+ "â": 74,
124
+ "ã": 75,
125
+ "ä": 76,
126
+ "å": 77,
127
+ "æ": 78,
128
+ "ç": 79,
129
+ "è": 80,
130
+ "é": 81,
131
+ "ê": 82,
132
+ "ë": 83,
133
+ "ì": 84,
134
+ "í": 85,
135
+ "î": 86,
136
+ "ï": 87,
137
+ "ð": 88,
138
+ "ñ": 89,
139
+ "ò": 90,
140
+ "ó": 91,
141
+ "ô": 92,
142
+ "õ": 93,
143
+ "ö": 94,
144
+ "ø": 95,
145
+ "ù": 96,
146
+ "ú": 97,
147
+ "û": 98,
148
+ "ü": 99,
149
+ "ý": 100,
150
+ "þ": 101,
151
+ "ÿ": 102,
152
+ "¡": 103
153
+ },
154
+ "merges": []
155
+ }
156
+ }