Always add gmask in token ids
Browse files- tokenization_chatglm.py +3 -14
tokenization_chatglm.py
CHANGED
@@ -326,22 +326,11 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
326 |
Returns:
|
327 |
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
328 |
"""
|
329 |
-
|
330 |
-
gmask_ids = self.sp_tokenizer[self.gmask_token]
|
331 |
eos_id = self.sp_tokenizer[self.eos_token]
|
332 |
-
|
333 |
-
token_ids_0 += [gmask_ids]
|
334 |
-
|
335 |
-
if token_ids_0[-1] != mask_ids and token_ids_0[-1] != gmask_ids:
|
336 |
-
token_ids_0 += [self.sp_tokenizer[self.end_token]]
|
337 |
-
|
338 |
-
token_ids_0 += [self.sp_tokenizer[self.bos_token]]
|
339 |
-
|
340 |
if token_ids_1 is not None:
|
341 |
-
|
342 |
-
token_ids_1 += [eos_id]
|
343 |
-
token_ids_0 += token_ids_1
|
344 |
-
|
345 |
return token_ids_0
|
346 |
|
347 |
def _pad(
|
|
|
326 |
Returns:
|
327 |
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
328 |
"""
|
329 |
+
gmask_id = self.sp_tokenizer[self.gmask_token]
|
|
|
330 |
eos_id = self.sp_tokenizer[self.eos_token]
|
331 |
+
token_ids_0 = token_ids_0 + [gmask_id, self.sp_tokenizer[self.bos_token]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
if token_ids_1 is not None:
|
333 |
+
token_ids_0 = token_ids_0 + token_ids_1 + [eos_id]
|
|
|
|
|
|
|
334 |
return token_ids_0
|
335 |
|
336 |
def _pad(
|