OrionZheng commited on
Commit
dc76bad
1 Parent(s): 0cd777f

Upload 3 files

Browse files
modeling_openmoe.py CHANGED
@@ -28,9 +28,7 @@ from torch import nn
28
  from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
29
  from transformers.modeling_utils import PreTrainedModel
30
  from transformers.models.llama.configuration_llama import LlamaConfig
31
- # ========= Disable Flash Attn =============
32
  # from .llama_attn import LlamaAttention
33
- # ========= Disable Flash Attn =============
34
 
35
  from transformers.utils import (
36
  add_start_docstrings,
@@ -399,11 +397,7 @@ class OpenMoeAttention(nn.Module):
399
  value_states = repeat_kv(value_states, self.num_key_value_groups)
400
 
401
  if HAS_FLASH_ATTN and self.use_kernel:
402
- # from flash_attn import flash_attn_func
403
- # If we use `from flash_attn import flash_attn_func` directly,
404
- # AutoModelForCausalLM.from_pretrained will treat flash_attn as a compulsory dependency and raise error if it cannot be found.
405
- # Here is a workaround to avoid the error.
406
- exec("from flash_attn import flash_attn_func")
407
 
408
  query_states = query_states.transpose(1, 2)
409
  key_states = key_states.transpose(1, 2)
 
28
  from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
29
  from transformers.modeling_utils import PreTrainedModel
30
  from transformers.models.llama.configuration_llama import LlamaConfig
 
31
  # from .llama_attn import LlamaAttention
 
32
 
33
  from transformers.utils import (
34
  add_start_docstrings,
 
397
  value_states = repeat_kv(value_states, self.num_key_value_groups)
398
 
399
  if HAS_FLASH_ATTN and self.use_kernel:
400
+ from flash_attn import flash_attn_func
 
 
 
 
401
 
402
  query_states = query_states.transpose(1, 2)
403
  key_states = key_states.transpose(1, 2)
tokenization_openmoe.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import T5Tokenizer
2
+ from typing import List, Optional, Tuple, Union
3
+
4
+ class OpenMoeTokenizer(T5Tokenizer):
5
+ def __init__(self, *args, **kwargs):
6
+ super().__init__(*args, **kwargs)
7
+ self.padding_side = 'left'
8
+ self.add_bos_token = True
9
+ self.add_eos_token = False
10
+
11
+ def build_inputs_with_special_tokens(
12
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
13
+ ) -> List[int]:
14
+ if self.add_eos_token:
15
+ token_ids_0 = self._add_eos_if_not_present(token_ids_0)
16
+ if self.add_bos_token:
17
+ token_ids_0 = [self.pad_token_id] + token_ids_0
18
+ if token_ids_1 is None:
19
+ return token_ids_0
20
+ else:
21
+ token_ids_1 = self._add_eos_if_not_present(token_ids_1)
22
+ return token_ids_0 + token_ids_1
tokenizer_config.json CHANGED
@@ -2745,8 +2745,13 @@
2745
  "sp_model_kwargs": {},
2746
  "spaces_between_special_tokens": false,
2747
  "tokenizer_class": "OpenMoeTokenizer",
2748
- "tokenizer_file": "/home1/08125/fuzhao/.cache/huggingface/hub/models--google--umt5-small/snapshots/8c63c2b77efbf8e41206a2c8d994846cc9392360/tokenizer.json",
2749
  "trust_remote_code": true,
2750
  "unk_token": "<unk>",
2751
- "verbose": false
 
 
 
 
 
 
2752
  }
 
2745
  "sp_model_kwargs": {},
2746
  "spaces_between_special_tokens": false,
2747
  "tokenizer_class": "OpenMoeTokenizer",
 
2748
  "trust_remote_code": true,
2749
  "unk_token": "<unk>",
2750
+ "verbose": false,
2751
+ "auto_map": {
2752
+ "AutoTokenizer": [
2753
+ "tokenization_openmoe.OpenMoeTokenizer",
2754
+ null
2755
+ ]
2756
+ }
2757
  }