OrionZheng
commited on
Commit
•
dc76bad
1
Parent(s):
0cd777f
Upload 3 files
Browse files- modeling_openmoe.py +1 -7
- tokenization_openmoe.py +22 -0
- tokenizer_config.json +7 -2
modeling_openmoe.py
CHANGED
@@ -28,9 +28,7 @@ from torch import nn
|
|
28 |
from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
|
29 |
from transformers.modeling_utils import PreTrainedModel
|
30 |
from transformers.models.llama.configuration_llama import LlamaConfig
|
31 |
-
# ========= Disable Flash Attn =============
|
32 |
# from .llama_attn import LlamaAttention
|
33 |
-
# ========= Disable Flash Attn =============
|
34 |
|
35 |
from transformers.utils import (
|
36 |
add_start_docstrings,
|
@@ -399,11 +397,7 @@ class OpenMoeAttention(nn.Module):
|
|
399 |
value_states = repeat_kv(value_states, self.num_key_value_groups)
|
400 |
|
401 |
if HAS_FLASH_ATTN and self.use_kernel:
|
402 |
-
|
403 |
-
# If we use `from flash_attn import flash_attn_func` directly,
|
404 |
-
# AutoModelForCausalLM.from_pretrained will treat flash_attn as a compulsory dependency and raise error if it cannot be found.
|
405 |
-
# Here is a workaround to avoid the error.
|
406 |
-
exec("from flash_attn import flash_attn_func")
|
407 |
|
408 |
query_states = query_states.transpose(1, 2)
|
409 |
key_states = key_states.transpose(1, 2)
|
|
|
28 |
from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
|
29 |
from transformers.modeling_utils import PreTrainedModel
|
30 |
from transformers.models.llama.configuration_llama import LlamaConfig
|
|
|
31 |
# from .llama_attn import LlamaAttention
|
|
|
32 |
|
33 |
from transformers.utils import (
|
34 |
add_start_docstrings,
|
|
|
397 |
value_states = repeat_kv(value_states, self.num_key_value_groups)
|
398 |
|
399 |
if HAS_FLASH_ATTN and self.use_kernel:
|
400 |
+
from flash_attn import flash_attn_func
|
|
|
|
|
|
|
|
|
401 |
|
402 |
query_states = query_states.transpose(1, 2)
|
403 |
key_states = key_states.transpose(1, 2)
|
tokenization_openmoe.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import T5Tokenizer
|
2 |
+
from typing import List, Optional, Tuple, Union
|
3 |
+
|
4 |
+
class OpenMoeTokenizer(T5Tokenizer):
|
5 |
+
def __init__(self, *args, **kwargs):
|
6 |
+
super().__init__(*args, **kwargs)
|
7 |
+
self.padding_side = 'left'
|
8 |
+
self.add_bos_token = True
|
9 |
+
self.add_eos_token = False
|
10 |
+
|
11 |
+
def build_inputs_with_special_tokens(
|
12 |
+
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
13 |
+
) -> List[int]:
|
14 |
+
if self.add_eos_token:
|
15 |
+
token_ids_0 = self._add_eos_if_not_present(token_ids_0)
|
16 |
+
if self.add_bos_token:
|
17 |
+
token_ids_0 = [self.pad_token_id] + token_ids_0
|
18 |
+
if token_ids_1 is None:
|
19 |
+
return token_ids_0
|
20 |
+
else:
|
21 |
+
token_ids_1 = self._add_eos_if_not_present(token_ids_1)
|
22 |
+
return token_ids_0 + token_ids_1
|
tokenizer_config.json
CHANGED
@@ -2745,8 +2745,13 @@
|
|
2745 |
"sp_model_kwargs": {},
|
2746 |
"spaces_between_special_tokens": false,
|
2747 |
"tokenizer_class": "OpenMoeTokenizer",
|
2748 |
-
"tokenizer_file": "/home1/08125/fuzhao/.cache/huggingface/hub/models--google--umt5-small/snapshots/8c63c2b77efbf8e41206a2c8d994846cc9392360/tokenizer.json",
|
2749 |
"trust_remote_code": true,
|
2750 |
"unk_token": "<unk>",
|
2751 |
-
"verbose": false
|
|
|
|
|
|
|
|
|
|
|
|
|
2752 |
}
|
|
|
2745 |
"sp_model_kwargs": {},
|
2746 |
"spaces_between_special_tokens": false,
|
2747 |
"tokenizer_class": "OpenMoeTokenizer",
|
|
|
2748 |
"trust_remote_code": true,
|
2749 |
"unk_token": "<unk>",
|
2750 |
+
"verbose": false,
|
2751 |
+
"auto_map": {
|
2752 |
+
"AutoTokenizer": [
|
2753 |
+
"tokenization_openmoe.OpenMoeTokenizer",
|
2754 |
+
null
|
2755 |
+
]
|
2756 |
+
}
|
2757 |
}
|