Răsfoiți Sursa

add llama-3.1-405b-8bit

Alex Cheema 5 luni în urmă
părinte
comite
c8438b6d23
2 a modificat fișierele cu 2 adăugiri și 0 ștergeri
  1. 1 0
      exo/models.py
  2. 1 0
      exo/tinychat/index.html

+ 1 - 0
exo/models.py

@@ -17,6 +17,7 @@ model_base_shards = {
     "TinygradDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3.1-70B-Instruct", start_layer=0, end_layer=0, n_layers=80),
   },
   "llama-3.1-405b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-405B-4bit", start_layer=0, end_layer=0, n_layers=126),},
+  "llama-3.1-405b-8bit": {"MLXDynamicShardInferenceEngine": Shard(model_id="IntuitIntel/Llama-3.1-405B-Instruct", start_layer=0, end_layer=0, n_layers=126),},
   "llama-3-8b": {
     "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32),
     "TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", start_layer=0, end_layer=0, n_layers=32),

+ 1 - 0
exo/tinychat/index.html

@@ -36,6 +36,7 @@
 <option value="llama-3.1-70b">Llama 3.1 70B</option>
 <option value="llama-3.1-70b-bf16">Llama 3.1 70B (BF16)</option>
 <option value="llama-3.1-405b">Llama 3.1 405B</option>
+<option value="llama-3.1-405b-8bit">Llama 3.1 405B (8-bit)</option>
 <option value="llama-3-8b">Llama 3 8B</option>
 <option value="llama-3-70b">Llama 3 70B</option>
 <option value="nemotron-70b">Nemotron 70B</option>