SyedAunZaidi commited on
Commit
6c1bdce
1 Parent(s): f49cdaa

Upload tokenizer

Browse files
Files changed (2) hide show
  1. added_tokens.json +2 -2
  2. vocab.json +79 -52
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 54,
3
- "<s>": 53
4
  }
 
1
  {
2
+ "</s>": 81,
3
+ "<s>": 80
4
  }
vocab.json CHANGED
@@ -1,55 +1,82 @@
1
  {
2
- "[PAD]": 52,
3
- "[UNK]": 51,
4
  "|": 0,
5
- "آ": 1,
6
- "ؤ": 2,
7
- "ئ": 3,
8
- "ا": 4,
9
- "ب": 5,
10
- "ت": 6,
11
- "ث": 7,
12
- "ج": 8,
13
- "ح": 9,
14
- "خ": 10,
15
- "د": 11,
16
- "ذ": 12,
17
- "ر": 13,
18
- "ز": 14,
19
- "س": 15,
20
- "ش": 16,
21
- "ص": 17,
22
- "ض": 18,
23
- "ط": 19,
24
- "ظ": 20,
25
- "ع": 21,
26
- "غ": 22,
27
- "ف": 23,
28
- "ق": 24,
29
- "ل": 25,
30
- "م": 26,
31
- "ن": 27,
32
- "و": 28,
33
- "ى": 29,
34
- "َ": 30,
35
- "ُ": 31,
36
- "ِ": 32,
37
- "ْ": 33,
38
- "ٔ": 34,
39
- "ٰ": 35,
40
- "ٹ": 36,
41
- "پ": 37,
42
- "چ": 38,
43
- "ڈ": 39,
44
- "ڑ": 40,
45
- "ژ": 41,
46
- "ک": 42,
47
- "گ": 43,
48
- "ں": 44,
49
- "ھ": 45,
50
- "ہ": 46,
51
- "ۃ": 47,
52
- "ی": 48,
53
- "ے": 49,
54
- "": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  }
 
1
  {
2
+ "[PAD]": 79,
3
+ "[UNK]": 78,
4
  "|": 0,
5
+ "ؑ": 1,
6
+ "ؓ": 2,
7
+ "ؔ": 3,
8
+ "ء": 4,
9
+ "آ": 5,
10
+ "أ": 6,
11
+ "ؤ": 7,
12
+ "ئ": 8,
13
+ "ا": 9,
14
+ "ب": 10,
15
+ "ت": 11,
16
+ "ث": 12,
17
+ "ج": 13,
18
+ "ح": 14,
19
+ "خ": 15,
20
+ "د": 16,
21
+ "ذ": 17,
22
+ "ر": 18,
23
+ "ز": 19,
24
+ "س": 20,
25
+ "ش": 21,
26
+ "ص": 22,
27
+ "ض": 23,
28
+ "ط": 24,
29
+ "ظ": 25,
30
+ "ع": 26,
31
+ "غ": 27,
32
+ "ف": 28,
33
+ "ق": 29,
34
+ "ك": 30,
35
+ "ل": 31,
36
+ "م": 32,
37
+ "ن": 33,
38
+ "ه": 34,
39
+ "و": 35,
40
+ "ى": 36,
41
+ "ً": 37,
42
+ "َ": 38,
43
+ "ُ": 39,
44
+ "ِ": 40,
45
+ "ّ": 41,
46
+ "ْ": 42,
47
+ "ٓ": 43,
48
+ "ٔ": 44,
49
+ "ٰ": 45,
50
+ "ٹ": 46,
51
+ "پ": 47,
52
+ "چ": 48,
53
+ "ڈ": 49,
54
+ "ڑ": 50,
55
+ "ژ": 51,
56
+ "ک": 52,
57
+ "گ": 53,
58
+ "ں": 54,
59
+ "ھ": 55,
60
+ "ہ": 56,
61
+ "ۂ": 57,
62
+ "ۃ": 58,
63
+ "ی": 59,
64
+ "ے": 60,
65
+ "ۓ": 61,
66
+ "‘": 62,
67
+ "’": 63,
68
+ "“": 64,
69
+ "”": 65,
70
+ "…": 66,
71
+ "ﭨ": 67,
72
+ "ﮭ": 68,
73
+ "ﮯ": 69,
74
+ "ﯾ": 70,
75
+ "ﷲ": 71,
76
+ "ﷺ": 72,
77
+ "ﺘ": 73,
78
+ "ﺩ": 74,
79
+ "ﺲ": 75,
80
+ "ﻧ": 76,
81
+ "ﻮ": 77
82
  }