oweller2
commited on
Commit
·
5a1ce8b
1
Parent(s):
d776f8b
vocab:
Browse files- tokenizer.py +4 -1
tokenizer.py
CHANGED
@@ -8,5 +8,8 @@ class ModernDecoderBERTTokenizer(PreTrainedTokenizer):
|
|
8 |
return [id for id in token_ids_0 if id != self.eos_token_id]
|
9 |
return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]
|
10 |
|
11 |
-
|
|
|
|
|
|
|
12 |
AutoTokenizer.register("ModernDecoderBERTTokenizer", ModernDecoderBERTTokenizer)
|
|
|
8 |
return [id for id in token_ids_0 if id != self.eos_token_id]
|
9 |
return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]
|
10 |
|
11 |
+
def get_vocab(self):
|
12 |
+
breakpoint()
|
13 |
+
return dict(self.vocab.items())
|
14 |
+
|
15 |
AutoTokenizer.register("ModernDecoderBERTTokenizer", ModernDecoderBERTTokenizer)
|