SyedAunZaidi commited on
Commit
1e0279b
1 Parent(s): 6c1bdce

Upload tokenizer

Browse files
Files changed (2) hide show
  1. added_tokens.json +2 -2
  2. vocab.json +81 -79
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 81,
3
- "<s>": 80
4
  }
 
1
  {
2
+ "</s>": 83,
3
+ "<s>": 82
4
  }
vocab.json CHANGED
@@ -1,82 +1,84 @@
1
  {
2
- "[PAD]": 79,
3
- "[UNK]": 78,
 
4
  "|": 0,
5
- "ؑ": 1,
6
- "ؓ": 2,
7
- "ؔ": 3,
8
- "ء": 4,
9
- "آ": 5,
10
- "أ": 6,
11
- "ؤ": 7,
12
- "ئ": 8,
13
- "ا": 9,
14
- "ب": 10,
15
- "ت": 11,
16
- "ث": 12,
17
- "ج": 13,
18
- "ح": 14,
19
- "خ": 15,
20
- "د": 16,
21
- "ذ": 17,
22
- "ر": 18,
23
- "ز": 19,
24
- "س": 20,
25
- "ش": 21,
26
- "ص": 22,
27
- "ض": 23,
28
- "ط": 24,
29
- "ظ": 25,
30
- "ع": 26,
31
- "غ": 27,
32
- "ف": 28,
33
- "ق": 29,
34
- "ك": 30,
35
- "ل": 31,
36
- "م": 32,
37
- "ن": 33,
38
- "ه": 34,
39
- "و": 35,
40
- "ى": 36,
41
- "ً": 37,
42
- "َ": 38,
43
- "ُ": 39,
44
- "ِ": 40,
45
- "ّ": 41,
46
- "ْ": 42,
47
- "ٓ": 43,
48
- "ٔ": 44,
49
- "ٰ": 45,
50
- "ٹ": 46,
51
- "پ": 47,
52
- "چ": 48,
53
- "ڈ": 49,
54
- "ڑ": 50,
55
- "ژ": 51,
56
- "ک": 52,
57
- "گ": 53,
58
- "ں": 54,
59
- "ھ": 55,
60
- "ہ": 56,
61
- "ۂ": 57,
62
- "ۃ": 58,
63
- "ی": 59,
64
- "ے": 60,
65
- "ۓ": 61,
66
- "": 62,
67
- "": 63,
68
- "": 64,
69
- "": 65,
70
- "": 66,
71
- "": 67,
72
- "": 68,
73
- "": 69,
74
- "": 70,
75
- "": 71,
76
- "": 72,
77
- "": 73,
78
- "": 74,
79
- "": 75,
80
- "": 76,
81
- "": 77
 
82
  }
 
1
  {
2
+ ",": 1,
3
+ "[PAD]": 81,
4
+ "[UNK]": 80,
5
  "|": 0,
6
+ "ؑ": 2,
7
+ "ؓ": 3,
8
+ "ؔ": 4,
9
+ "ء": 5,
10
+ "آ": 6,
11
+ "أ": 7,
12
+ "ؤ": 8,
13
+ "ئ": 9,
14
+ "ا": 10,
15
+ "ب": 11,
16
+ "ت": 12,
17
+ "ث": 13,
18
+ "ج": 14,
19
+ "ح": 15,
20
+ "خ": 16,
21
+ "د": 17,
22
+ "ذ": 18,
23
+ "ر": 19,
24
+ "ز": 20,
25
+ "س": 21,
26
+ "ش": 22,
27
+ "ص": 23,
28
+ "ض": 24,
29
+ "ط": 25,
30
+ "ظ": 26,
31
+ "ع": 27,
32
+ "غ": 28,
33
+ "ف": 29,
34
+ "ق": 30,
35
+ "ك": 31,
36
+ "ل": 32,
37
+ "م": 33,
38
+ "ن": 34,
39
+ "ه": 35,
40
+ "و": 36,
41
+ "ى": 37,
42
+ "ً": 38,
43
+ "َ": 39,
44
+ "ُ": 40,
45
+ "ِ": 41,
46
+ "ّ": 42,
47
+ "ْ": 43,
48
+ "ٓ": 44,
49
+ "ٔ": 45,
50
+ "ٗ": 46,
51
+ "ٰ": 47,
52
+ "ٹ": 48,
53
+ "پ": 49,
54
+ "چ": 50,
55
+ "ڈ": 51,
56
+ "ڑ": 52,
57
+ "ژ": 53,
58
+ "ک": 54,
59
+ "گ": 55,
60
+ "ں": 56,
61
+ "ھ": 57,
62
+ "ہ": 58,
63
+ "ۂ": 59,
64
+ "ۃ": 60,
65
+ "ی": 61,
66
+ "ے": 62,
67
+ "ۓ": 63,
68
+ "": 64,
69
+ "": 65,
70
+ "": 66,
71
+ "": 67,
72
+ "": 68,
73
+ "": 69,
74
+ "": 70,
75
+ "": 71,
76
+ "": 72,
77
+ "": 73,
78
+ "": 74,
79
+ "": 75,
80
+ "": 76,
81
+ "": 77,
82
+ "": 78,
83
+ "ﻮ": 79
84
  }