Selaa lähdekoodia

Revert "Merge pull request #573 from damho1104/feature/add-exaone-3.5-model"

This reverts commit 4eb6a6a74ab96019b0201f24815a5735f4520a0e, reversing
changes made to fdc3b5ac0268254fd6a9d9d7c615ea9b102f25bd.
Alex Cheema 4 kuukautta sitten
vanhempi
commit
e08522ee97
3 muutettua tiedostoa jossa 1 lisäystä ja 85 poistoa
  1. 0 80
      exo/inference/mlx/models/exaone.py
  2. 0 4
      exo/models.py
  3. 1 1
      setup.py

+ 0 - 80
exo/inference/mlx/models/exaone.py

@@ -1,80 +0,0 @@
-from dataclasses import dataclass, field
-import mlx.core as mx
-import mlx.nn as nn
-from mlx_lm.models.base import create_attention_mask
-from mlx_lm.models.exaone import TransformerBlock, ModelArgs
-from ...shard import Shard
-from .base import IdentityBlock
-
-
-@dataclass
-class ModelArgs(ModelArgs):
-    shard: Shard = field(default_factory=lambda: Shard("", 0, 0, 0))
-
-    def __post_init__(self):
-        # super().__post_init__()  # Ensure parent initializations are respected
-
-        if isinstance(self.shard, Shard):
-            return
-        if not isinstance(self.shard, dict):
-            raise TypeError(f"Expected shard to be a Shard instance or a dict, got {type(self.shard)} instead")
-
-        self.shard = Shard(**self.shard)
-
-
-class ExaoneModel(nn.Module):
-    def __init__(self, args: ModelArgs):
-        super().__init__()
-        self.wte = nn.Embedding(args.vocab_size, args.hidden_size)
-        self.h = [TransformerBlock(args) for _ in range(args.num_layers)]
-        self.ln_f = nn.RMSNorm(args.hidden_size, eps=args.layer_norm_epsilon)
-
-    def __call__(
-        self,
-        inputs: mx.array,
-        cache=None,
-    ):
-        h = self.wte(inputs)
-        mask = create_attention_mask(h, cache)
-
-        if cache is None:
-            cache = [None] * len(self.h)
-
-        for layer, c in zip(self.h, cache):
-            h = layer(h, mask, cache=c)
-
-        return self.ln_f(h)
-
-
-class Model(nn.Module):
-    def __init__(self, args: ModelArgs):
-        super().__init__()
-        self.args = args
-        self.model_type = args.model_type
-        self.transformer = ExaoneModel(args)
-        if not args.tie_word_embeddings:
-            self.lm_head = nn.Linear(args.hidden_size, args.vocab_size, bias=False)
-
-    def __call__(
-        self,
-        inputs: mx.array,
-        cache=None,
-    ):
-        out = self.transformer(inputs, cache)
-        if self.args.tie_word_embeddings:
-            out = self.transformer.wte.as_linear(out)
-        else:
-            out = self.lm_head(out)
-        return out
-
-    @property
-    def layers(self):
-        return self.transformer.h
-
-    @property
-    def head_dim(self):
-        return self.args.head_dim
-
-    @property
-    def n_kv_heads(self):
-        return self.args.num_key_value_heads

+ 0 - 4
exo/models.py

@@ -113,8 +113,6 @@ model_cards = {
   "gemma2-27b": { "layers": 46, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/gemma-2-27b-it-4bit", }, },
   # dummy
   "dummy": { "layers": 8, "repo": { "DummyInferenceEngine": "dummy", }, },
-  "exaone-3.5-7.8b": {"layers": 32, "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/EXAONE-3.5-7.8B-Instruct-4bit"}, },
-  "exaone-3.5-2.4b": {"layers": 30, "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/EXAONE-3.5-2.4B-Instruct-4bit"}, },
 }
 
 pretty_name = {
@@ -153,8 +151,6 @@ pretty_name = {
   "qwen-2.5-math-72b": "Qwen 2.5 72B (Math)",
   "llama-3-8b": "Llama 3 8B",
   "llama-3-70b": "Llama 3 70B",
-  "exaone-3.5-2.4b": "EXAONE-3.5 2.4B",
-  "exaone-3.5-7.8b": "EXAONE-3.5 7.8B",
 }
 
 def get_repo(model_id: str, inference_engine_classname: str) -> Optional[str]:

+ 1 - 1
setup.py

@@ -36,7 +36,7 @@ extras_require = {
   ],
   "apple_silicon": [
     "mlx==0.20.0",
-    "mlx-lm==0.20.5",
+    "mlx-lm==0.19.3",
   ],
 }