nbtpj commited on
Commit
c540716
1 Parent(s): 6896570

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</background>": 50266,
3
+ "</evidence>": 50277,
4
+ "</int>": 50273,
5
+ "</out>": 50275,
6
+ "</pop>": 50271,
7
+ "</ref>": 50268,
8
+ "<background>": 50265,
9
+ "<evidence>": 50276,
10
+ "<int>": 50272,
11
+ "<out>": 50274,
12
+ "<pop>": 50270,
13
+ "<ref>": 50267,
14
+ "<sep>": 50269
15
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<background>",
4
+ "</background>",
5
+ "<ref>",
6
+ "</ref>",
7
+ "<sep>",
8
+ "<pop>",
9
+ "</pop>",
10
+ "<int>",
11
+ "</int>",
12
+ "<out>",
13
+ "</out>",
14
+ "<evidence>",
15
+ "</evidence>"
16
+ ],
17
+ "bos_token": "<s>",
18
+ "cls_token": "<s>",
19
+ "eos_token": "</s>",
20
+ "mask_token": {
21
+ "content": "<mask>",
22
+ "lstrip": true,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "pad_token": "<pad>",
28
+ "sep_token": "</s>",
29
+ "unk_token": "<unk>"
30
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "additional_special_tokens": [
4
+ "<background>",
5
+ "</background>",
6
+ "<ref>",
7
+ "</ref>",
8
+ "<sep>",
9
+ "<pop>",
10
+ "</pop>",
11
+ "<int>",
12
+ "</int>",
13
+ "<out>",
14
+ "</out>",
15
+ "<evidence>",
16
+ "</evidence>"
17
+ ],
18
+ "bos_token": {
19
+ "__type": "AddedToken",
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "clean_up_tokenization_spaces": true,
27
+ "cls_token": {
28
+ "__type": "AddedToken",
29
+ "content": "<s>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ },
35
+ "eos_token": {
36
+ "__type": "AddedToken",
37
+ "content": "</s>",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ },
43
+ "errors": "replace",
44
+ "mask_token": {
45
+ "__type": "AddedToken",
46
+ "content": "<mask>",
47
+ "lstrip": true,
48
+ "normalized": true,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ "model_max_length": 1024,
53
+ "pad_token": {
54
+ "__type": "AddedToken",
55
+ "content": "<pad>",
56
+ "lstrip": false,
57
+ "normalized": true,
58
+ "rstrip": false,
59
+ "single_word": false
60
+ },
61
+ "sep_token": {
62
+ "__type": "AddedToken",
63
+ "content": "</s>",
64
+ "lstrip": false,
65
+ "normalized": true,
66
+ "rstrip": false,
67
+ "single_word": false
68
+ },
69
+ "tokenizer_class": "BartTokenizer",
70
+ "trim_offsets": true,
71
+ "unk_token": {
72
+ "__type": "AddedToken",
73
+ "content": "<unk>",
74
+ "lstrip": false,
75
+ "normalized": true,
76
+ "rstrip": false,
77
+ "single_word": false
78
+ }
79
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff