fix state_dict loading in MoE model
Browse files- config.json +1 -1
- modeling_llama_moe_hf.py +0 -9
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"add_weight_norm": false,
|
4 |
"architectures": [
|
5 |
"LlamaMoEForCausalLM"
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "JuncaiL/llama-265m",
|
3 |
"add_weight_norm": false,
|
4 |
"architectures": [
|
5 |
"LlamaMoEForCausalLM"
|
modeling_llama_moe_hf.py
CHANGED
@@ -1670,12 +1670,3 @@ class LlamaMoEForCausalLM(LlamaMoEPreTrainedModel):
|
|
1670 |
def reset_experts(self):
|
1671 |
self.model.reset_experts()
|
1672 |
|
1673 |
-
@classmethod
|
1674 |
-
def from_pretrained(cls, *model_args, **kwargs):
|
1675 |
-
config = kwargs.pop("config", None)
|
1676 |
-
model = cls(config)
|
1677 |
-
state_dict = kwargs.pop("state_dict", None)
|
1678 |
-
if state_dict is not None:
|
1679 |
-
model.load_state_dict(state_dict)
|
1680 |
-
return model
|
1681 |
-
|
|
|
1670 |
def reset_experts(self):
|
1671 |
self.model.reset_experts()
|
1672 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|