|
@@ -59,4 +59,11 @@ model_base_shards = {
|
|
|
"qwen-2.5-math-72b": {
|
|
|
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Math-72B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80),
|
|
|
},
|
|
|
+ ### nemotron
|
|
|
+ "nemotron-70b": {
|
|
|
+ "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/nvidia_Llama-3.1-Nemotron-70B-Instruct-HF_4bit", start_layer=0, end_layer=0, n_layers=80),
|
|
|
+ },
|
|
|
+ "nemotron-70b-bf16": {
|
|
|
+ "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.1-Nemotron-70B-Instruct-HF-bf16", start_layer=0, end_layer=0, n_layers=80),
|
|
|
+ },
|
|
|
}
|