před 11 měsíci · 5c67e24c35
--- a/exo/api/chatgpt_api.py
+++ b/exo/api/chatgpt_api.py
@@ -8,7 +8,7 @@ from typing import List, Literal, Union, Dict
 
															 from aiohttp import web
														
 
															 import aiohttp_cors
														
 
															 from exo import DEBUG, VERSION
														
 
															-from exo.helpers import terminal_link
														
 
															+from exo.helpers import terminal_link, PrefixDict
														
 
															 from exo.inference.shard import Shard
														
 
															 from exo.orchestration import Node
														
@@ -49,6 +49,7 @@ shard_mappings = {
 
															 }
														
 
															+
														
 
															 class Message:
														
 
															     def __init__(self, role: str, content: Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]):
														
 
															         self.role = role
														
@@ -234,6 +235,11 @@ def parse_chat_request(data: dict):
 
															     data.get("temperature", 0.0),
														
 
															   )
														
 
															+class PromptSession:
														
 
															+  def __init__(self, request_id: str, timestamp: int, prompt: str):
														
 
															+    self.request_id = request_id
														
 
															+    self.timestamp = timestamp
														
 
															+    self.prompt = prompt
														
 
															 class ChatGPTAPI:
														
 
															   def __init__(self, node: Node, inference_engine_classname: str, response_timeout_secs: int = 90):
														
@@ -241,6 +247,7 @@ class ChatGPTAPI:
 
															     self.inference_engine_classname = inference_engine_classname
														
 
															     self.response_timeout_secs = response_timeout_secs
														
 
															     self.app = web.Application(client_max_size=100 * 1024 * 1024)  # 100MB to support image upload
														
 
															+    self.prompts: PrefixDict[str, PromptSession] = PrefixDict()
														
 
															     self.prev_token_lens: Dict[str, int] = {}
														
 
															     self.stream_tasks: Dict[str, asyncio.Task] = {}
														
 
															     cors = aiohttp_cors.setup(self.app)
														
@@ -293,12 +300,24 @@ class ChatGPTAPI:
 
															         {"detail": f"Unsupported model: {chat_request.model} with inference engine {self.inference_engine_classname}. Supported models for this engine: {supported_models}"},
														
 
															         status=400,
														
 
															       )
														
 
															-    request_id = str(uuid.uuid4())
														
 
															     tokenizer = await resolve_tokenizer(shard.model_id)
														
 
															     if DEBUG >= 4: print(f"Resolved tokenizer: {tokenizer}")
														
 
															     prompt, image_str = build_prompt(tokenizer, chat_request.messages)
														
 
															+    request_id = None
														
 
															+    match = self.prompts.find_longest_prefix(prompt)
														
 
															+    if match:
														
 
															+        if DEBUG >= 2:
														
 
															+            print(f"Prompt for request starts with previous prompt {len(match[1].prompt)} of {len(prompt)}: {match[1].prompt}")
														
 
															+        request_id = match[1].request_id
														
 
															+        self.prompts.add(prompt, PromptSession(request_id=request_id, timestamp=int(time.time()), prompt=prompt))
														
 
															+        # remove the matching prefix from the prompt
														
 
															+        prompt = prompt[len(match[1].prompt):]
														
 
															+    else:
														
 
															+      request_id = str(uuid.uuid4())
														
 
															+      self.prompts.add(prompt, PromptSession(request_id=request_id, timestamp=int(time.time()), prompt=prompt))
														
 
															+
														
 
															     callback_id = f"chatgpt-api-wait-response-{request_id}"
														
 
															     callback = self.node.on_token.register(callback_id)
														
--- a/exo/helpers.py
+++ b/exo/helpers.py
@@ -1,6 +1,7 @@
 
															 import os
														
 
															 import asyncio
														
 
															-from typing import Any, Callable, Coroutine, TypeVar, Optional, Dict, Generic, Tuple
														
 
															+from typing import Any, Callable, TypeVar, Optional, Dict, Generic, Tuple, List
														
 
															+from collections import defaultdict
														
 
															 import socket
														
 
															 import random
														
 
															 import platform
														
@@ -97,8 +98,6 @@ def terminal_link(uri, label=None):
 
															 T = TypeVar("T")
														
 
															 K = TypeVar("K")
														
 
															-
														
 
															-
														
 
															 class AsyncCallback(Generic[T]):
														
 
															   def __init__(self) -> None:
														
 
															     self.condition: asyncio.Condition = asyncio.Condition()
														
@@ -147,3 +146,23 @@ class AsyncCallbackSystem(Generic[K, T]):
 
															   def trigger_all(self, *args: T) -> None:
														
 
															     for callback in self.callbacks.values():
														
 
															       callback.set(*args)
														
 
															+
														
 
															+
														
 
															+K = TypeVar('K', bound=str)
														
 
															+V = TypeVar('V')
														
 
															+class PrefixDict(Generic[K, V]):
														
 
															+    def __init__(self):
														
 
															+        self.items: Dict[K, V] = {}
														
 
															+
														
 
															+    def add(self, key: K, value: V) -> None:
														
 
															+        self.items[key] = value
														
 
															+
														
 
															+    def find_prefix(self, argument: str) -> List[Tuple[K, V]]:
														
 
															+        return [(key, value) for key, value in self.items.items() if argument.startswith(key)]
														
 
															+
														
 
															+    def find_longest_prefix(self, argument: str) -> Optional[Tuple[K, V]]:
														
 
															+        matches = self.find_prefix(argument)
														
 
															+        if len(matches) == 0:
														
 
															+            return None
														
 
															+
														
 
															+        return max(matches, key=lambda x: len(x[0]))