Reihaneh commited on
Commit
59412c7
·
verified ·
1 Parent(s): 2586ef0

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +98 -98
vocab.json CHANGED
@@ -1,105 +1,105 @@
1
  {
2
- "$": 78,
3
- "&": 76,
4
- "'": 21,
5
- "(": 43,
6
- "-": 86,
7
- "F": 37,
8
- "L": 90,
9
- "N": 85,
10
- "R": 1,
11
- "Y": 19,
12
- "[": 75,
13
  "[FR]": 102,
14
  "[FY-NL]": 101,
15
  "[PAD]": 100,
16
  "[UNK]": 99,
17
- "]": 3,
18
- "a": 97,
19
- "b": 38,
20
- "c": 82,
21
- "d": 17,
22
- "e": 18,
23
- "f": 61,
24
- "g": 92,
25
- "h": 41,
26
- "i": 35,
27
- "j": 12,
28
- "k": 93,
29
- "l": 96,
30
- "m": 65,
31
- "n": 32,
32
- "o": 39,
33
- "p": 25,
34
- "q": 49,
35
- "r": 42,
36
- "s": 59,
37
- "t": 27,
38
- "u": 88,
39
- "v": 80,
40
- "w": 84,
41
- "x": 63,
42
- "y": 69,
43
- "z": 74,
44
- "|": 53,
45
- "~": 66,
46
- "«": 40,
47
- "´": 68,
48
- "»": 55,
49
- "à": 58,
50
- "á": 9,
51
- "â": 24,
52
- "ã": 8,
53
- "ä": 23,
54
- "å": 98,
55
- "æ": 67,
56
- "ç": 45,
57
- "è": 30,
58
- "é": 33,
59
- "ê": 31,
60
- "ë": 94,
61
- "í": 51,
62
- "î": 71,
63
- "ï": 91,
64
- "ñ": 7,
65
- "ó": 54,
66
- "ô": 36,
67
- "ö": 77,
68
- "ø": 4,
69
- "ù": 62,
70
- "ú": 48,
71
- "û": 60,
72
- "ü": 22,
73
- "ā": 34,
74
- "ă": 26,
75
- "ć": 16,
76
- "č": 57,
77
- "ę": 15,
78
- "ĝ": 28,
79
  "ł": 56,
80
- "ō": 14,
81
- "ő": 70,
82
- "œ": 20,
83
- "ś": 11,
84
- "ş": 81,
85
- "š": 72,
86
- "ū": 10,
87
- "ż": 2,
88
- "ž": 46,
89
- "ș": 50,
90
- "ʿ": 52,
91
- "ː": 89,
92
- "̀": 95,
93
- "́": 64,
94
- "̂": 29,
95
- "̧": 5,
96
- "–": 0,
97
- "—": 73,
98
- "‘": 83,
99
- "’": 87,
100
- "…": 44,
101
- "ℤ": 6,
102
- "保": 79,
103
- "宇": 47,
104
- "津": 13
105
  }
 
1
  {
2
+ "$": 98,
3
+ "&": 51,
4
+ "'": 31,
5
+ "(": 3,
6
+ "-": 82,
7
+ "F": 45,
8
+ "L": 11,
9
+ "N": 20,
10
+ "R": 22,
11
+ "Y": 83,
12
+ "[": 72,
13
  "[FR]": 102,
14
  "[FY-NL]": 101,
15
  "[PAD]": 100,
16
  "[UNK]": 99,
17
+ "]": 21,
18
+ "a": 76,
19
+ "b": 88,
20
+ "c": 12,
21
+ "d": 70,
22
+ "e": 35,
23
+ "f": 80,
24
+ "g": 64,
25
+ "h": 74,
26
+ "i": 47,
27
+ "j": 13,
28
+ "k": 55,
29
+ "l": 91,
30
+ "m": 66,
31
+ "n": 30,
32
+ "o": 8,
33
+ "p": 59,
34
+ "q": 90,
35
+ "r": 37,
36
+ "s": 44,
37
+ "t": 77,
38
+ "u": 68,
39
+ "v": 26,
40
+ "w": 69,
41
+ "x": 5,
42
+ "y": 25,
43
+ "z": 6,
44
+ "|": 95,
45
+ "~": 52,
46
+ "«": 42,
47
+ "´": 10,
48
+ "»": 32,
49
+ "à": 33,
50
+ "á": 46,
51
+ "â": 86,
52
+ "ã": 84,
53
+ "ä": 9,
54
+ "å": 39,
55
+ "æ": 57,
56
+ "ç": 23,
57
+ "è": 7,
58
+ "é": 18,
59
+ "ê": 15,
60
+ "ë": 14,
61
+ "í": 0,
62
+ "î": 60,
63
+ "ï": 62,
64
+ "ñ": 4,
65
+ "ó": 40,
66
+ "ô": 65,
67
+ "ö": 63,
68
+ "ø": 24,
69
+ "ù": 71,
70
+ "ú": 93,
71
+ "û": 79,
72
+ "ü": 43,
73
+ "ā": 94,
74
+ "ă": 1,
75
+ "ć": 34,
76
+ "č": 27,
77
+ "ę": 92,
78
+ "ĝ": 48,
79
  "ł": 56,
80
+ "ō": 81,
81
+ "ő": 85,
82
+ "œ": 16,
83
+ "ś": 49,
84
+ "ş": 73,
85
+ "š": 58,
86
+ "ū": 41,
87
+ "ż": 50,
88
+ "ž": 67,
89
+ "ș": 29,
90
+ "ʿ": 89,
91
+ "ː": 36,
92
+ "̀": 61,
93
+ "́": 96,
94
+ "̂": 87,
95
+ "̧": 28,
96
+ "–": 19,
97
+ "—": 17,
98
+ "‘": 78,
99
+ "’": 97,
100
+ "…": 38,
101
+ "ℤ": 53,
102
+ "保": 75,
103
+ "宇": 54,
104
+ "津": 2
105
  }