team-sanai
/

tanuki_dentaku_merged

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

if001 commited on May 20, 2024

Commit

7a518b4

·

1 Parent(s): 23ebed3

fix

Files changed (1) hide show

modeling_llama.py +2 -5

modeling_llama.py CHANGED Viewed

@@ -69,7 +69,6 @@ def convert_linear_to_moe(
     in_features: int,
     out_features: int,
     bias: bool = True,
-    show_debug: bool = False,
 ):
     """Converts nn.Linear to MoeLayer
     Args:
@@ -89,7 +88,6 @@ def convert_linear_to_moe(
                 bias=bias,
                 name=name,
                 layer_idx=layer_idx,
-                show_debug=show_debug
             )
         else:
             return MoeLayer(
@@ -145,7 +143,7 @@ class MoeLayer(nn.Module):
         return results
 class LoRAMoeLayer(torch.nn.Module):
-    def __init__(self, config, in_features, out_features, bias, name = "", layer_idx = -1, show_debug=False) -> None:
         super().__init__()
         self.config = config
@@ -195,8 +193,7 @@ class LoRAMoeLayer(torch.nn.Module):
         weights, selected_experts = torch.topk(
             gate_logits, self.num_experts_per_tok
         )  # b,s,n
-        if hasattr(config, "show_debug") and config["show_debug"] == True:
-            if self._layer_idx == 0 or self._layer_idx == 16 or self._layer_idx == 31:
                 print(f"{self._name}_{self._layer_idx}: {selected_experts}")
                 print("-"*10)
         weights = F.softmax(weights, dim=2, dtype=torch.float).to(

     in_features: int,
     out_features: int,
     bias: bool = True,
 ):
     """Converts nn.Linear to MoeLayer
     Args:
                 bias=bias,
                 name=name,
                 layer_idx=layer_idx,
             )
         else:
             return MoeLayer(
         return results
 class LoRAMoeLayer(torch.nn.Module):
+    def __init__(self, config, in_features, out_features, bias, name = "", layer_idx = -1) -> None:
         super().__init__()
         self.config = config
         weights, selected_experts = torch.topk(
             gate_logits, self.num_experts_per_tok
         )  # b,s,n
+        if getattr(self.config, "show_debug", False) and self._layer_idx == 0 or self._layer_idx == 16 or self._layer_idx == 31:
                 print(f"{self._name}_{self._layer_idx}: {selected_experts}")
                 print("-"*10)
         weights = F.softmax(weights, dim=2, dtype=torch.float).to(