fix piece id out of range
#20
by
zRzRzRzRzRzRzR
- opened
- tokenization_chatglm.py +1 -1
tokenization_chatglm.py
CHANGED
@@ -83,7 +83,7 @@ class SPTokenizer:
|
|
83 |
"""Converts an index (integer) in a token (str) using the vocab."""
|
84 |
if index in self.index_special_tokens:
|
85 |
return self.index_special_tokens[index]
|
86 |
-
if index in [self.eos_id, self.bos_id, self.pad_id] or index < 0:
|
87 |
return ""
|
88 |
return self.sp_model.IdToPiece(index)
|
89 |
|
|
|
83 |
"""Converts an index (integer) in a token (str) using the vocab."""
|
84 |
if index in self.index_special_tokens:
|
85 |
return self.index_special_tokens[index]
|
86 |
+
if index in [self.eos_id, self.bos_id, self.pad_id] or index < 0 or index > self.sp_model.vocab_size():
|
87 |
return ""
|
88 |
return self.sp_model.IdToPiece(index)
|
89 |
|