oweller2 commited on
Commit
5a1ce8b
·
1 Parent(s): d776f8b
Files changed (1) hide show
  1. tokenizer.py +4 -1
tokenizer.py CHANGED
@@ -8,5 +8,8 @@ class ModernDecoderBERTTokenizer(PreTrainedTokenizer):
8
  return [id for id in token_ids_0 if id != self.eos_token_id]
9
  return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]
10
 
11
-
 
 
 
12
  AutoTokenizer.register("ModernDecoderBERTTokenizer", ModernDecoderBERTTokenizer)
 
8
  return [id for id in token_ids_0 if id != self.eos_token_id]
9
  return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]
10
 
11
+ def get_vocab(self):
12
+ breakpoint()
13
+ return dict(self.vocab.items())
14
+
15
  AutoTokenizer.register("ModernDecoderBERTTokenizer", ModernDecoderBERTTokenizer)