Răsfoiți Sursa

Merge pull request #431 from exo-explore/qwen32b

add qwen2.5 coder 3b,14b,32b
Alex Cheema 6 luni în urmă
părinte
comite
4713bc5acd
2 a modificat fișierele cu 6 adăugiri și 1 ștergeri
  1. 3 1
      exo/models.py
  2. 3 0
      exo/tinychat/index.html

+ 3 - 1
exo/models.py

@@ -42,8 +42,10 @@ model_base_shards = {
   "llava-1.5-7b-hf": {"MLXDynamicShardInferenceEngine": Shard(model_id="llava-hf/llava-1.5-7b-hf", start_layer=0, end_layer=0, n_layers=32),},
   "llava-1.5-7b-hf": {"MLXDynamicShardInferenceEngine": Shard(model_id="llava-hf/llava-1.5-7b-hf", start_layer=0, end_layer=0, n_layers=32),},
   ### qwen
   ### qwen
   "qwen-2.5-coder-1.5b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28),},
   "qwen-2.5-coder-1.5b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28),},
+  "qwen-2.5-coder-3b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-3B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=36),},
   "qwen-2.5-coder-7b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28),},
   "qwen-2.5-coder-7b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28),},
-  "qwen-2.5-coder-32b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28),},
+  "qwen-2.5-coder-14b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-14B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=48),},
+  "qwen-2.5-coder-32b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=64),},
   "qwen-2.5-7b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28),},
   "qwen-2.5-7b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28),},
   "qwen-2.5-math-7b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Math-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28),},
   "qwen-2.5-math-7b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Math-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28),},
   "qwen-2.5-14b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-14B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=48),},
   "qwen-2.5-14b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-14B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=48),},

+ 3 - 0
exo/tinychat/index.html

@@ -47,7 +47,10 @@
 <option value="deepseek-coder-v2.5">Deepseek Coder V2.5</option>
 <option value="deepseek-coder-v2.5">Deepseek Coder V2.5</option>
 <option value="llava-1.5-7b-hf">LLaVa 1.5 7B (Vision Model)</option>
 <option value="llava-1.5-7b-hf">LLaVa 1.5 7B (Vision Model)</option>
 <option value="qwen-2.5-coder-1.5b">Qwen 2.5 Coder 1.5B</option>
 <option value="qwen-2.5-coder-1.5b">Qwen 2.5 Coder 1.5B</option>
+<option value="qwen-2.5-coder-3b">Qwen 2.5 Coder 3B</option>
 <option value="qwen-2.5-coder-7b">Qwen 2.5 Coder 7B</option>
 <option value="qwen-2.5-coder-7b">Qwen 2.5 Coder 7B</option>
+<option value="qwen-2.5-coder-14b">Qwen 2.5 Coder 14B</option>
+<option value="qwen-2.5-coder-32b">Qwen 2.5 Coder 32B</option>
 <option value="qwen-2.5-7b">Qwen 2.5 7B</option>
 <option value="qwen-2.5-7b">Qwen 2.5 7B</option>
 <option value="qwen-2.5-math-7b">Qwen 2.5 7B (Math)</option>
 <option value="qwen-2.5-math-7b">Qwen 2.5 7B (Math)</option>
 <option value="qwen-2.5-14b">Qwen 2.5 14B</option>
 <option value="qwen-2.5-14b">Qwen 2.5 14B</option>