|
@@ -17,6 +17,7 @@ model_base_shards = {
|
|
|
"TinygradDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3.1-70B-Instruct", start_layer=0, end_layer=0, n_layers=80),
|
|
|
},
|
|
|
"llama-3.1-405b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-405B-4bit", start_layer=0, end_layer=0, n_layers=126),},
|
|
|
+ "llama-3.1-405b-8bit": {"MLXDynamicShardInferenceEngine": Shard(model_id="IntuitIntel/Llama-3.1-405B-Instruct", start_layer=0, end_layer=0, n_layers=126),},
|
|
|
"llama-3-8b": {
|
|
|
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32),
|
|
|
"TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", start_layer=0, end_layer=0, n_layers=32),
|