7 months ago · b0dc94477a
--- a/exo/api/chatgpt_api.py
+++ b/exo/api/chatgpt_api.py
@@ -11,10 +11,11 @@ import traceback
 
															 from exo import DEBUG, VERSION
														
 
															 from exo.download.download_progress import RepoProgressEvent
														
 
															 from exo.helpers import PrefixDict
														
 
															+from exo.inference.inference_engine import inference_engine_classes
														
 
															 from exo.inference.shard import Shard
														
 
															 from exo.inference.tokenizers import resolve_tokenizer
														
 
															 from exo.orchestration import Node
														
 
															-from exo.models import build_base_shard, model_cards, get_repo
														
 
															+from exo.models import build_base_shard, model_cards, get_repo, pretty_name
														
 
															 from typing import Callable
														
@@ -171,6 +172,7 @@ class ChatGPTAPI:
 
															     cors.add(self.app.router.add_post("/chat/completions", self.handle_post_chat_completions), {"*": cors_options})
														
 
															     cors.add(self.app.router.add_post("/v1/chat/completions", self.handle_post_chat_completions), {"*": cors_options})
														
 
															     cors.add(self.app.router.add_get("/v1/download/progress", self.handle_get_download_progress), {"*": cors_options})
														
 
															+    cors.add(self.app.router.add_get("/modelpool", self.handle_model_support), {"*": cors_options})
														
 
															     self.static_dir = Path(__file__).parent.parent/"tinychat"
														
 
															     self.app.router.add_get("/", self.handle_root)
														
@@ -198,6 +200,9 @@ class ChatGPTAPI:
 
															   async def handle_root(self, request):
														
 
															     return web.FileResponse(self.static_dir/"index.html")
														
 
															+  async def handle_model_support(self, request):
														
 
															+    return web.json_response({"model pool": { m: pretty_name.get(m, m) for m in [k for k,v in model_cards.items() if all(map(lambda e: e in v["repo"], list(dict.fromkeys([inference_engine_classes.get(i,None) for i in self.node.topology_inference_engines_pool for i in i if i is not None] + [self.inference_engine_classname]))))]}})
														
 
															+  
														
 
															   async def handle_get_models(self, request):
														
 
															     return web.json_response([{"id": model_name, "object": "model", "owned_by": "exo", "ready": True} for model_name, _ in model_cards.items()])
														
--- a/exo/inference/inference_engine.py
+++ b/exo/inference/inference_engine.py
@@ -29,6 +29,11 @@ class InferenceEngine(ABC):
 
															     output_data = await self.infer_tensor(request_id, shard, tokens)
														
 
															     return output_data 
														
 
															+inference_engine_classes = {
														
 
															+  "mlx": "MLXDynamicShardInferenceEngine",
														
 
															+  "tinygrad": "TinygradDynamicShardInferenceEngine",
														
 
															+  "dummy": "DummyInferenceEngine",
														
 
															+}
														
 
															 def get_inference_engine(inference_engine_name: str, shard_downloader: 'ShardDownloader'):
														
 
															   if DEBUG >= 2:
														
--- a/exo/models.py
+++ b/exo/models.py
@@ -83,6 +83,37 @@ model_cards = {
 
															   "dummy": { "layers": 8, "repo": { "DummyInferenceEngine": "dummy", }, },
														
 
															 }
														
 
															+pretty_name = {
														
 
															+  "llama-3.2-1b": "Llama 3.2 1B",
														
 
															+  "llama-3.2-3b": "Llama 3.2 3B",
														
 
															+  "llama-3.1-8b": "Llama 3.1 8B",
														
 
															+  "llama-3.1-70b": "Llama 3.1 70B",
														
 
															+  "llama-3.1-70b-bf16": "Llama 3.1 70B (BF16)",
														
 
															+  "llama-3.1-405b": "Llama 3.1 405B",
														
 
															+  "llama-3.1-405b-8bit": "Llama 3.1 405B (8-bit)",
														
 
															+  "gemma2-9b": "Gemma2 9B",
														
 
															+  "gemma2-27b": "Gemma2 27B",
														
 
															+  "nemotron-70b": "Nemotron 70B",
														
 
															+  "nemotron-70b-bf16": "Nemotron 70B (BF16)",
														
 
															+  "mistral-nemo": "Mistral Nemo",
														
 
															+  "mistral-large": "Mistral Large",
														
 
															+  "deepseek-coder-v2-lite": "Deepseek Coder V2 Lite",
														
 
															+  "deepseek-coder-v2.5": "Deepseek Coder V2.5",
														
 
															+  "llava-1.5-7b-hf": "LLaVa 1.5 7B (Vision Model)",
														
 
															+  "qwen-2.5-coder-1.5b": "Qwen 2.5 Coder 1.5B",
														
 
															+  "qwen-2.5-coder-3b": "Qwen 2.5 Coder 3B",
														
 
															+  "qwen-2.5-coder-7b": "Qwen 2.5 Coder 7B",
														
 
															+  "qwen-2.5-coder-14b": "Qwen 2.5 Coder 14B",
														
 
															+  "qwen-2.5-coder-32b": "Qwen 2.5 Coder 32B",
														
 
															+  "qwen-2.5-7b": "Qwen 2.5 7B",
														
 
															+  "qwen-2.5-math-7b": "Qwen 2.5 7B (Math)",
														
 
															+  "qwen-2.5-14b": "Qwen 2.5 14B",
														
 
															+  "qwen-2.5-72b": "Qwen 2.5 72B",
														
 
															+  "qwen-2.5-math-72b": "Qwen 2.5 72B (Math)",
														
 
															+  "llama-3-8b": "Llama 3 8B",
														
 
															+  "llama-3-70b": "Llama 3 70B",
														
 
															+}
														
 
															+
														
 
															 def get_repo(model_id: str, inference_engine_classname: str) -> Optional[str]:
														
 
															   return model_cards.get(model_id, {}).get("repo", {}).get(inference_engine_classname, None)
														
--- a/exo/tinychat/index.html
+++ b/exo/tinychat/index.html
@@ -29,36 +29,8 @@
 
															     <div x-show="errorMessage" x-transition.opacity x-text="errorMessage" class="toast">
														
 
															     </div>
														
 
															 <div class="model-selector">
														
 
															-<select @change="if (cstate) cstate.selectedModel = $event.target.value" x-model="cstate.selectedModel">
														
 
															-<option value="llama-3.2-1b">Llama 3.2 1B</option>
														
 
															-<option value="llama-3.2-3b">Llama 3.2 3B</option>
														
 
															-<option value="llama-3.1-8b">Llama 3.1 8B</option>
														
 
															-<option value="llama-3.1-70b">Llama 3.1 70B</option>
														
 
															-<option value="llama-3.1-70b-bf16">Llama 3.1 70B (BF16)</option>
														
 
															-<option value="llama-3.1-405b">Llama 3.1 405B</option>
														
 
															-<option value="llama-3.1-405b-8bit">Llama 3.1 405B (8-bit)</option>
														
 
															-<option value="gemma2-9b">Gemma2 9B</option>
														
 
															-<option value="gemma2-27b">Gemma2 27B</option>
														
 
															-<option value="nemotron-70b">Nemotron 70B</option>
														
 
															-<option value="nemotron-70b-bf16">Nemotron 70B (BF16)</option>
														
 
															-<option value="mistral-nemo">Mistral Nemo</option>
														
 
															-<option value="mistral-large">Mistral Large</option>
														
 
															-<option value="deepseek-coder-v2-lite">Deepseek Coder V2 Lite</option>
														
 
															-<option value="deepseek-coder-v2.5">Deepseek Coder V2.5</option>
														
 
															-<option value="llava-1.5-7b-hf">LLaVa 1.5 7B (Vision Model)</option>
														
 
															-<option value="qwen-2.5-coder-1.5b">Qwen 2.5 Coder 1.5B</option>
														
 
															-<option value="qwen-2.5-coder-3b">Qwen 2.5 Coder 3B</option>
														
 
															-<option value="qwen-2.5-coder-7b">Qwen 2.5 Coder 7B</option>
														
 
															-<option value="qwen-2.5-coder-14b">Qwen 2.5 Coder 14B</option>
														
 
															-<option value="qwen-2.5-coder-32b">Qwen 2.5 Coder 32B</option>
														
 
															-<option value="qwen-2.5-7b">Qwen 2.5 7B</option>
														
 
															-<option value="qwen-2.5-math-7b">Qwen 2.5 7B (Math)</option>
														
 
															-<option value="qwen-2.5-14b">Qwen 2.5 14B</option>
														
 
															-<option value="qwen-2.5-72b">Qwen 2.5 72B</option>
														
 
															-<option value="qwen-2.5-math-72b">Qwen 2.5 72B (Math)</option>
														
 
															-<option value="llama-3-8b">Llama 3 8B</option>
														
 
															-<option value="llama-3-70b">Llama 3 70B</option>
														
 
															-</select>
														
 
															+  <select @change="if (cstate) cstate.selectedModel = $event.target.value" x-model="cstate.selectedModel" x-init="await populateSelector()" class='model-select'>
														
 
															+  </select>
														
 
															 </div>
														
 
															 <div @popstate.window="
														
 
															       if (home === 2) {
														
@@ -221,6 +193,7 @@
 
															 <i class="fas fa-times"></i>
														
 
															 </button>
														
 
															 </div>
														
 
															+<script src="await populateSelector()" defer></script>
														
 
															 <textarea :disabled="generating" :placeholder="generating ? 'Generating...' : 'Say something'" @input="
														
 
															             home = (home === 0) ? 1 : home
														
 
															             if (cstate.messages.length === 0 &amp;&amp; $el.value === '') home = -1;
														
--- a/exo/tinychat/index.js
+++ b/exo/tinychat/index.js
@@ -72,6 +72,28 @@ document.addEventListener("alpine:init", () => {
 
															       return `${s}s`;
														
 
															     },
														
 
															+    async populateSelector() {
														
 
															+      const response = await fetch(`${this.endpoint}/modelpool`);
														
 
															+      console.log("Populating Selector")
														
 
															+      if(!response.ok) {
														
 
															+        const errorResBody = await response.json();
														
 
															+        if (errorResBody?.detail) {
														
 
															+          throw new Error(`Failed to get model pool: ${errorResBody.detail}`);
														
 
															+        } else {
														
 
															+          throw new Error("Failed to get model pool: Unknown error");
														
 
															+        }
														
 
															+      }
														
 
															+      sel = document.getElementById("model-select");
														
 
															+      sel.empty();
														
 
															+      response["model pool"].map((k, v) => {
														
 
															+        let opt = document.createElement("option");
														
 
															+        opt.value = k;
														
 
															+        opt.innerHtml = v;
														
 
															+        console.log(`Model: ${k} (${v})`)
														
 
															+        sel.append(opt);
														
 
															+      });
														
 
															+    },
														
 
															+
														
 
															     async handleImageUpload(event) {
														
 
															       const file = event.target.files[0];
														
 
															       if (file) {
														
@@ -535,6 +557,7 @@ function createParser(onParse) {
 
															     }
														
 
															   }
														
 
															 }
														
 
															+
														
 
															 const BOM = [239, 187, 191];
														
 
															 function hasBom(buffer) {
														
 
															   return BOM.every((charCode, index) => buffer.charCodeAt(index) === charCode);