|
@@ -32,7 +32,6 @@ model_base_shards = {
|
|
|
### mistral
|
|
|
"mistral-nemo": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Nemo-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=40),},
|
|
|
"mistral-large": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Large-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=88),},
|
|
|
- "ministral-8b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Ministral-8B-Instruct-2410-4bit", start_layer=0, end_layer=0, n_layers=32),},
|
|
|
### deepseek
|
|
|
"deepseek-coder-v2-lite": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx", start_layer=0, end_layer=0, n_layers=27),},
|
|
|
"deepseek-coder-v2.5": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/DeepSeek-V2.5-MLX-AQ4_1_64", start_layer=0, end_layer=0, n_layers=60),},
|