mlabonne
/

phixtral-2x2_8

Text Generation

Mixture of Experts

cognitivecomputations/dolphin-2_6-phi-2

lxuechen/phi-2-dpo

Model card Files Files and versions

mlabonne commited on Jan 9, 2024

Commit

ba0a899

·

1 Parent(s): db615a2

Update modeling_phi.py

Files changed (1) hide show

modeling_phi.py +3 -7

modeling_phi.py CHANGED Viewed

@@ -294,15 +294,11 @@ class MoE(nn.Module):
     def __init__(
         self,
         config: PretrainedConfig,
-        num_experts=2,
-        num_experts_per_tok=2,
-        num_shards=1,
-        **kwargs,
     ):
         super().__init__()
-        self.mlp = nn.ModuleList([MLP(config) for i in range(num_experts)])
-        self.gate = nn.Linear(config.n_embd, num_experts, bias=False)
-        self.num_experts_per_tok = num_experts_per_tok
     def forward(self, x):
         orig_shape = x.shape

     def __init__(
         self,
         config: PretrainedConfig,
     ):
         super().__init__()
+        self.mlp = nn.ModuleList([MLP(config) for i in range(config.num_local_experts)])
+        self.gate = nn.Linear(config.n_embd, config.num_local_experts, bias=False)
+        self.num_experts_per_tok = config.num_experts_per_tok
     def forward(self, x):
         orig_shape = x.shape