josh 8 luni în urmă
părinte
comite
913fdcc661
4 a modificat fișierele cu 5 adăugiri și 5 ștergeri
  1. 1 1
      exo/api/chatgpt_api.py
  2. 2 2
      exo/helpers.py
  3. 1 1
      exo/inference/mlx/sharded_utils.py
  4. 1 1
      setup.py

+ 1 - 1
exo/api/chatgpt_api.py

@@ -189,7 +189,7 @@ class ChatGPTAPI:
     response = web.json_response({"detail": "Quit signal received"}, status=200)
     await response.prepare(request)
     await response.write_eof()
-    await shutdown(signal.SIGINT, asyncio.get_event_loop(), self.node)
+    await shutdown(signal.SIGINT, asyncio.get_event_loop(), self.node.server)
 
   async def timeout_middleware(self, app, handler):
     async def middleware(request):

+ 2 - 2
exo/helpers.py

@@ -237,7 +237,7 @@ def get_all_ip_addresses():
     return ["localhost"]
 
 
-async def shutdown(signal, loop, node):
+async def shutdown(signal, loop, server):
   """Gracefully shutdown the server and close the asyncio loop."""
   print(f"Received exit signal {signal.name}...")
   print("Thank you for using exo.")
@@ -246,7 +246,7 @@ async def shutdown(signal, loop, node):
   [task.cancel() for task in server_tasks]
   print(f"Cancelling {len(server_tasks)} outstanding tasks")
   await asyncio.gather(*server_tasks, return_exceptions=True)
-  await node.server.stop()
+  await server.stop()
 
 
 def is_frozen():

+ 1 - 1
exo/inference/mlx/sharded_utils.py

@@ -184,7 +184,7 @@ async def load_shard(
     processor.encode = processor.tokenizer.encode
     return model, processor
   else:
-    tokenizer = load_tokenizer(model_path, tokenizer_config)
+    tokenizer = await resolve_tokenizer(model_path)
     return model, tokenizer
 
 

+ 1 - 1
setup.py

@@ -13,7 +13,7 @@ install_requires = [
   "Jinja2==3.1.4",
   "netifaces==0.11.0",
   "numpy==2.0.0",
-  "nuitka==2.4.10",
+  "nuitka==2.4.11",
   "nvidia-ml-py==12.560.30",
   "pillow==10.4.0",
   "prometheus-client==0.20.0",