chatgpt_api.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704
  1. import uuid
  2. import time
  3. import asyncio
  4. import json
  5. import os
  6. from pathlib import Path
  7. from transformers import AutoTokenizer
  8. from typing import List, Literal, Union, Dict, Optional
  9. from aiohttp import web
  10. import aiohttp_cors
  11. import traceback
  12. import signal
  13. from exo import DEBUG, VERSION
  14. from exo.download.download_progress import RepoProgressEvent
  15. from exo.helpers import PrefixDict, shutdown, get_exo_images_dir
  16. from exo.inference.tokenizers import resolve_tokenizer
  17. from exo.orchestration import Node
  18. from exo.models import build_base_shard, model_cards, get_repo, pretty_name
  19. from typing import Callable, Optional
  20. from PIL import Image
  21. import numpy as np
  22. import base64
  23. from io import BytesIO
  24. import platform
  25. if platform.system().lower() == "darwin" and platform.machine().lower() == "arm64":
  26. import mlx.core as mx
  27. else:
  28. import numpy as mx
  29. import tempfile
  30. from exo.download.hf.hf_shard_download import HFShardDownloader
  31. import shutil
  32. from exo.download.hf.hf_helpers import get_hf_home, get_repo_root
  33. from exo.apputil import create_animation_mp4
  34. from collections import defaultdict
  35. class Message:
  36. def __init__(self, role: str, content: Union[str, List[Dict[str, Union[str, Dict[str, str]]]]], tools: Optional[List[Dict]] = None):
  37. self.role = role
  38. self.content = content
  39. self.tools = tools
  40. def to_dict(self):
  41. data = {"role": self.role, "content": self.content}
  42. if self.tools:
  43. data["tools"] = self.tools
  44. return data
  45. class ChatCompletionRequest:
  46. def __init__(self, model: str, messages: List[Message], temperature: float, tools: Optional[List[Dict]] = None):
  47. self.model = model
  48. self.messages = messages
  49. self.temperature = temperature
  50. self.tools = tools
  51. def to_dict(self):
  52. return {"model": self.model, "messages": [message.to_dict() for message in self.messages], "temperature": self.temperature, "tools": self.tools}
  53. def generate_completion(
  54. chat_request: ChatCompletionRequest,
  55. tokenizer,
  56. prompt: str,
  57. request_id: str,
  58. tokens: List[int],
  59. stream: bool,
  60. finish_reason: Union[Literal["length", "stop"], None],
  61. object_type: Literal["chat.completion", "text_completion"],
  62. ) -> dict:
  63. completion = {
  64. "id": f"chatcmpl-{request_id}",
  65. "object": object_type,
  66. "created": int(time.time()),
  67. "model": chat_request.model,
  68. "system_fingerprint": f"exo_{VERSION}",
  69. "choices": [{
  70. "index": 0,
  71. "message": {"role": "assistant", "content": tokenizer.decode(tokens)},
  72. "logprobs": None,
  73. "finish_reason": finish_reason,
  74. }],
  75. }
  76. if not stream:
  77. completion["usage"] = {
  78. "prompt_tokens": len(tokenizer.encode(prompt)),
  79. "completion_tokens": len(tokens),
  80. "total_tokens": len(tokenizer.encode(prompt)) + len(tokens),
  81. }
  82. choice = completion["choices"][0]
  83. if object_type.startswith("chat.completion"):
  84. key_name = "delta" if stream else "message"
  85. choice[key_name] = {"role": "assistant", "content": tokenizer.decode(tokens)}
  86. elif object_type == "text_completion":
  87. choice["text"] = tokenizer.decode(tokens)
  88. else:
  89. ValueError(f"Unsupported response type: {object_type}")
  90. return completion
  91. def remap_messages(messages: List[Message]) -> List[Message]:
  92. remapped_messages = []
  93. last_image = None
  94. for message in messages:
  95. if not isinstance(message.content, list):
  96. remapped_messages.append(message)
  97. continue
  98. remapped_content = []
  99. for content in message.content:
  100. if isinstance(content, dict):
  101. if content.get("type") in ["image_url", "image"]:
  102. image_url = content.get("image_url", {}).get("url") or content.get("image")
  103. if image_url:
  104. last_image = {"type": "image", "image": image_url}
  105. remapped_content.append({"type": "text", "text": "[An image was uploaded but is not displayed here]"})
  106. else:
  107. remapped_content.append(content)
  108. else:
  109. remapped_content.append(content)
  110. remapped_messages.append(Message(role=message.role, content=remapped_content))
  111. if last_image:
  112. # Replace the last image placeholder with the actual image content
  113. for message in reversed(remapped_messages):
  114. for i, content in enumerate(message.content):
  115. if isinstance(content, dict):
  116. if content.get("type") == "text" and content.get("text") == "[An image was uploaded but is not displayed here]":
  117. message.content[i] = last_image
  118. return remapped_messages
  119. return remapped_messages
  120. def build_prompt(tokenizer, _messages: List[Message], tools: Optional[List[Dict]] = None):
  121. messages = remap_messages(_messages)
  122. chat_template_args = {"conversation": [m.to_dict() for m in messages], "tokenize": False, "add_generation_prompt": True}
  123. if tools:
  124. chat_template_args["tools"] = tools
  125. try:
  126. prompt = tokenizer.apply_chat_template(**chat_template_args)
  127. if DEBUG >= 3: print(f"!!! Prompt: {prompt}")
  128. return prompt
  129. except UnicodeEncodeError:
  130. # Handle Unicode encoding by ensuring everything is UTF-8
  131. chat_template_args["conversation"] = [
  132. {k: v.encode('utf-8').decode('utf-8') if isinstance(v, str) else v
  133. for k, v in m.to_dict().items()}
  134. for m in messages
  135. ]
  136. prompt = tokenizer.apply_chat_template(**chat_template_args)
  137. if DEBUG >= 3: print(f"!!! Prompt (UTF-8 encoded): {prompt}")
  138. return prompt
  139. def parse_message(data: dict):
  140. if "role" not in data or "content" not in data:
  141. raise ValueError(f"Invalid message: {data}. Must have 'role' and 'content'")
  142. return Message(data["role"], data["content"], data.get("tools"))
  143. def parse_chat_request(data: dict, default_model: str):
  144. return ChatCompletionRequest(
  145. data.get("model", default_model),
  146. [parse_message(msg) for msg in data["messages"]],
  147. data.get("temperature", 0.0),
  148. data.get("tools", None),
  149. )
  150. class PromptSession:
  151. def __init__(self, request_id: str, timestamp: int, prompt: str):
  152. self.request_id = request_id
  153. self.timestamp = timestamp
  154. self.prompt = prompt
  155. class ChatGPTAPI:
  156. def __init__(
  157. self,
  158. node: Node,
  159. inference_engine_classname: str,
  160. response_timeout: int = 90,
  161. on_chat_completion_request: Callable[[str, ChatCompletionRequest, str], None] = None,
  162. default_model: Optional[str] = None,
  163. system_prompt: Optional[str] = None
  164. ):
  165. self.node = node
  166. self.inference_engine_classname = inference_engine_classname
  167. self.response_timeout = response_timeout
  168. self.on_chat_completion_request = on_chat_completion_request
  169. self.app = web.Application(client_max_size=100*1024*1024) # 100MB to support image upload
  170. self.prompts: PrefixDict[str, PromptSession] = PrefixDict()
  171. self.prev_token_lens: Dict[str, int] = {}
  172. self.stream_tasks: Dict[str, asyncio.Task] = {}
  173. self.default_model = default_model or "llama-3.2-1b"
  174. self.token_queues = defaultdict(asyncio.Queue)
  175. # Get the callback system and register our handler
  176. self.token_callback = node.on_token.register("chatgpt-api-token-handler")
  177. self.token_callback.on_next(lambda _request_id, tokens, is_finished: asyncio.create_task(self.handle_tokens(_request_id, tokens, is_finished)))
  178. self.system_prompt = system_prompt
  179. cors = aiohttp_cors.setup(self.app)
  180. cors_options = aiohttp_cors.ResourceOptions(
  181. allow_credentials=True,
  182. expose_headers="*",
  183. allow_headers="*",
  184. allow_methods="*",
  185. )
  186. cors.add(self.app.router.add_get("/models", self.handle_get_models), {"*": cors_options})
  187. cors.add(self.app.router.add_get("/v1/models", self.handle_get_models), {"*": cors_options})
  188. cors.add(self.app.router.add_post("/chat/token/encode", self.handle_post_chat_token_encode), {"*": cors_options})
  189. cors.add(self.app.router.add_post("/v1/chat/token/encode", self.handle_post_chat_token_encode), {"*": cors_options})
  190. cors.add(self.app.router.add_post("/chat/completions", self.handle_post_chat_completions), {"*": cors_options})
  191. cors.add(self.app.router.add_post("/v1/chat/completions", self.handle_post_chat_completions), {"*": cors_options})
  192. cors.add(self.app.router.add_post("/v1/image/generations", self.handle_post_image_generations), {"*": cors_options})
  193. cors.add(self.app.router.add_get("/v1/download/progress", self.handle_get_download_progress), {"*": cors_options})
  194. cors.add(self.app.router.add_get("/modelpool", self.handle_model_support), {"*": cors_options})
  195. cors.add(self.app.router.add_get("/healthcheck", self.handle_healthcheck), {"*": cors_options})
  196. cors.add(self.app.router.add_post("/quit", self.handle_quit), {"*": cors_options})
  197. cors.add(self.app.router.add_delete("/models/{model_name}", self.handle_delete_model), {"*": cors_options})
  198. cors.add(self.app.router.add_get("/initial_models", self.handle_get_initial_models), {"*": cors_options})
  199. cors.add(self.app.router.add_post("/create_animation", self.handle_create_animation), {"*": cors_options})
  200. cors.add(self.app.router.add_post("/download", self.handle_post_download), {"*": cors_options})
  201. cors.add(self.app.router.add_get("/v1/topology", self.handle_get_topology), {"*": cors_options})
  202. cors.add(self.app.router.add_get("/topology", self.handle_get_topology), {"*": cors_options})
  203. # Add static routes
  204. if "__compiled__" not in globals():
  205. self.static_dir = Path(__file__).parent.parent/"tinychat"
  206. self.app.router.add_get("/", self.handle_root)
  207. self.app.router.add_static("/", self.static_dir, name="static")
  208. # Always add images route, regardless of compilation status
  209. self.images_dir = get_exo_images_dir()
  210. self.images_dir.mkdir(parents=True, exist_ok=True)
  211. self.app.router.add_static('/images/', self.images_dir, name='static_images')
  212. self.app.middlewares.append(self.timeout_middleware)
  213. self.app.middlewares.append(self.log_request)
  214. async def handle_quit(self, request):
  215. if DEBUG >= 1: print("Received quit signal")
  216. response = web.json_response({"detail": "Quit signal received"}, status=200)
  217. await response.prepare(request)
  218. await response.write_eof()
  219. await shutdown(signal.SIGINT, asyncio.get_event_loop(), self.node.server)
  220. async def timeout_middleware(self, app, handler):
  221. async def middleware(request):
  222. try:
  223. return await asyncio.wait_for(handler(request), timeout=self.response_timeout)
  224. except asyncio.TimeoutError:
  225. return web.json_response({"detail": "Request timed out"}, status=408)
  226. return middleware
  227. async def log_request(self, app, handler):
  228. async def middleware(request):
  229. if DEBUG >= 2: print(f"Received request: {request.method} {request.path}")
  230. return await handler(request)
  231. return middleware
  232. async def handle_root(self, request):
  233. return web.FileResponse(self.static_dir/"index.html")
  234. async def handle_healthcheck(self, request):
  235. return web.json_response({"status": "ok"})
  236. async def handle_model_support(self, request):
  237. try:
  238. response = web.StreamResponse(status=200, reason='OK', headers={
  239. 'Content-Type': 'text/event-stream',
  240. 'Cache-Control': 'no-cache',
  241. 'Connection': 'keep-alive',
  242. })
  243. await response.prepare(request)
  244. async def process_model(model_name, pretty):
  245. if model_name in model_cards:
  246. model_info = model_cards[model_name]
  247. if self.inference_engine_classname in model_info.get("repo", {}):
  248. shard = build_base_shard(model_name, self.inference_engine_classname)
  249. if shard:
  250. downloader = HFShardDownloader(quick_check=True)
  251. downloader.current_shard = shard
  252. downloader.current_repo_id = get_repo(shard.model_id, self.inference_engine_classname)
  253. status = await downloader.get_shard_download_status()
  254. download_percentage = status.get("overall") if status else None
  255. total_size = status.get("total_size") if status else None
  256. total_downloaded = status.get("total_downloaded") if status else False
  257. model_data = {
  258. model_name: {
  259. "name": pretty, "downloaded": download_percentage == 100 if download_percentage is not None else False, "download_percentage": download_percentage, "total_size": total_size,
  260. "total_downloaded": total_downloaded
  261. }
  262. }
  263. await response.write(f"data: {json.dumps(model_data)}\n\n".encode())
  264. # Process all models in parallel
  265. await asyncio.gather(*[process_model(model_name, pretty) for model_name, pretty in pretty_name.items()])
  266. await response.write(b"data: [DONE]\n\n")
  267. return response
  268. except Exception as e:
  269. print(f"Error in handle_model_support: {str(e)}")
  270. traceback.print_exc()
  271. return web.json_response({"detail": f"Server error: {str(e)}"}, status=500)
  272. async def handle_get_models(self, request):
  273. models_list = [{"id": model_name, "object": "model", "owned_by": "exo", "ready": True} for model_name, _ in model_cards.items()]
  274. return web.json_response({"object": "list", "data": models_list})
  275. async def handle_post_chat_token_encode(self, request):
  276. data = await request.json()
  277. model = data.get("model", self.default_model)
  278. if model and model.startswith("gpt-"): # Handle gpt- model requests
  279. model = self.default_model
  280. if not model or model not in model_cards:
  281. if DEBUG >= 1: print(f"Invalid model: {model}. Supported: {list(model_cards.keys())}. Defaulting to {self.default_model}")
  282. model = self.default_model
  283. shard = build_base_shard(model, self.inference_engine_classname)
  284. messages = [parse_message(msg) for msg in data.get("messages", [])]
  285. tokenizer = await resolve_tokenizer(get_repo(shard.model_id, self.inference_engine_classname))
  286. prompt = build_prompt(tokenizer, messages, data.get("tools", None))
  287. tokens = tokenizer.encode(prompt)
  288. return web.json_response({
  289. "length": len(prompt),
  290. "num_tokens": len(tokens),
  291. "encoded_tokens": tokens,
  292. "encoded_prompt": prompt,
  293. })
  294. async def handle_get_download_progress(self, request):
  295. progress_data = {}
  296. for node_id, progress_event in self.node.node_download_progress.items():
  297. if isinstance(progress_event, RepoProgressEvent):
  298. progress_data[node_id] = progress_event.to_dict()
  299. else:
  300. print(f"Unknown progress event type: {type(progress_event)}. {progress_event}")
  301. return web.json_response(progress_data)
  302. async def handle_post_chat_completions(self, request):
  303. data = await request.json()
  304. if DEBUG >= 2: print(f"[ChatGPTAPI] Handling chat completions request from {request.remote}: {data}")
  305. stream = data.get("stream", False)
  306. chat_request = parse_chat_request(data, self.default_model)
  307. if chat_request.model and chat_request.model.startswith("gpt-"): # to be compatible with ChatGPT tools, point all gpt- model requests to default model
  308. chat_request.model = self.default_model
  309. if not chat_request.model or chat_request.model not in model_cards:
  310. if DEBUG >= 1: print(f"[ChatGPTAPI] Invalid model: {chat_request.model}. Supported: {list(model_cards.keys())}. Defaulting to {self.default_model}")
  311. chat_request.model = self.default_model
  312. shard = build_base_shard(chat_request.model, self.inference_engine_classname)
  313. if not shard:
  314. supported_models = [model for model, info in model_cards.items() if self.inference_engine_classname in info.get("repo", {})]
  315. return web.json_response(
  316. {"detail": f"Unsupported model: {chat_request.model} with inference engine {self.inference_engine_classname}. Supported models for this engine: {supported_models}"},
  317. status=400,
  318. )
  319. tokenizer = await resolve_tokenizer(get_repo(shard.model_id, self.inference_engine_classname))
  320. if DEBUG >= 4: print(f"[ChatGPTAPI] Resolved tokenizer: {tokenizer}")
  321. # Add system prompt if set
  322. if self.system_prompt and not any(msg.role == "system" for msg in chat_request.messages):
  323. chat_request.messages.insert(0, Message("system", self.system_prompt))
  324. prompt = build_prompt(tokenizer, chat_request.messages, chat_request.tools)
  325. request_id = str(uuid.uuid4())
  326. if self.on_chat_completion_request:
  327. try:
  328. self.on_chat_completion_request(request_id, chat_request, prompt)
  329. except Exception as e:
  330. if DEBUG >= 2: traceback.print_exc()
  331. if DEBUG >= 2: print(f"[ChatGPTAPI] Processing prompt: {request_id=} {shard=} {prompt=}")
  332. try:
  333. await asyncio.wait_for(asyncio.shield(asyncio.create_task(self.node.process_prompt(shard, prompt, request_id=request_id))), timeout=self.response_timeout)
  334. if DEBUG >= 2: print(f"[ChatGPTAPI] Waiting for response to finish. timeout={self.response_timeout}s")
  335. if stream:
  336. response = web.StreamResponse(
  337. status=200,
  338. reason="OK",
  339. headers={
  340. "Content-Type": "text/event-stream",
  341. "Cache-Control": "no-cache",
  342. },
  343. )
  344. await response.prepare(request)
  345. try:
  346. # Stream tokens while waiting for inference to complete
  347. while True:
  348. if DEBUG >= 2: print(f"[ChatGPTAPI] Waiting for token from queue: {request_id=}")
  349. tokens, is_finished = await asyncio.wait_for(
  350. self.token_queues[request_id].get(),
  351. timeout=self.response_timeout
  352. )
  353. if DEBUG >= 2: print(f"[ChatGPTAPI] Got token from queue: {request_id=} {tokens=} {is_finished=}")
  354. eos_token_id = None
  355. if not eos_token_id and hasattr(tokenizer, "eos_token_id"): eos_token_id = tokenizer.eos_token_id
  356. if not eos_token_id and hasattr(tokenizer, "_tokenizer"): eos_token_id = tokenizer.special_tokens_map.get("eos_token_id")
  357. finish_reason = None
  358. if is_finished: finish_reason = "stop" if tokens[-1] == eos_token_id else "length"
  359. if DEBUG >= 2: print(f"{eos_token_id=} {tokens[-1]=} {finish_reason=}")
  360. completion = generate_completion(
  361. chat_request,
  362. tokenizer,
  363. prompt,
  364. request_id,
  365. tokens,
  366. stream,
  367. finish_reason,
  368. "chat.completion",
  369. )
  370. await response.write(f"data: {json.dumps(completion)}\n\n".encode())
  371. if is_finished:
  372. break
  373. await response.write_eof()
  374. return response
  375. except asyncio.TimeoutError:
  376. if DEBUG >= 2: print(f"[ChatGPTAPI] Timeout waiting for token: {request_id=}")
  377. return web.json_response({"detail": "Response generation timed out"}, status=408)
  378. except Exception as e:
  379. if DEBUG >= 2:
  380. print(f"[ChatGPTAPI] Error processing prompt: {e}")
  381. traceback.print_exc()
  382. return web.json_response(
  383. {"detail": f"Error processing prompt: {str(e)}"},
  384. status=500
  385. )
  386. finally:
  387. # Clean up the queue for this request
  388. if request_id in self.token_queues:
  389. if DEBUG >= 2: print(f"[ChatGPTAPI] Cleaning up token queue: {request_id=}")
  390. del self.token_queues[request_id]
  391. else:
  392. tokens = []
  393. while True:
  394. _tokens, is_finished = await asyncio.wait_for(self.token_queues[request_id].get(), timeout=self.response_timeout)
  395. tokens.extend(_tokens)
  396. if is_finished:
  397. break
  398. finish_reason = "length"
  399. eos_token_id = None
  400. if not eos_token_id and hasattr(tokenizer, "eos_token_id"): eos_token_id = tokenizer.eos_token_id
  401. if not eos_token_id and hasattr(tokenizer, "_tokenizer"): eos_token_id = tokenizer.special_tokens_map.get("eos_token_id")
  402. if DEBUG >= 2: print(f"Checking if end of tokens result {tokens[-1]=} is {eos_token_id=}")
  403. if tokens[-1] == eos_token_id:
  404. finish_reason = "stop"
  405. return web.json_response(generate_completion(chat_request, tokenizer, prompt, request_id, tokens, stream, finish_reason, "chat.completion"))
  406. except asyncio.TimeoutError:
  407. return web.json_response({"detail": "Response generation timed out"}, status=408)
  408. except Exception as e:
  409. if DEBUG >= 2: traceback.print_exc()
  410. return web.json_response({"detail": f"Error processing prompt (see logs with DEBUG>=2): {str(e)}"}, status=500)
  411. async def handle_post_image_generations(self, request):
  412. data = await request.json()
  413. if DEBUG >= 2: print(f"Handling chat completions request from {request.remote}: {data}")
  414. stream = data.get("stream", False)
  415. model = data.get("model", "")
  416. prompt = data.get("prompt", "")
  417. image_url = data.get("image_url", "")
  418. if DEBUG >= 2: print(f"model: {model}, prompt: {prompt}, stream: {stream}")
  419. shard = build_base_shard(model, self.inference_engine_classname)
  420. if DEBUG >= 2: print(f"shard: {shard}")
  421. if not shard:
  422. return web.json_response({"error": f"Unsupported model: {model} with inference engine {self.inference_engine_classname}"}, status=400)
  423. request_id = str(uuid.uuid4())
  424. callback_id = f"chatgpt-api-wait-response-{request_id}"
  425. callback = self.node.on_token.register(callback_id)
  426. try:
  427. if image_url != "" and image_url != None:
  428. img = self.base64_decode(image_url)
  429. else:
  430. img = None
  431. await asyncio.wait_for(asyncio.shield(asyncio.create_task(self.node.process_prompt(shard, prompt, request_id=request_id, inference_state={"image": img}))), timeout=self.response_timeout)
  432. response = web.StreamResponse(status=200, reason='OK', headers={
  433. 'Content-Type': 'application/octet-stream',
  434. "Cache-Control": "no-cache",
  435. })
  436. await response.prepare(request)
  437. def get_progress_bar(current_step, total_steps, bar_length=50):
  438. # Calculate the percentage of completion
  439. percent = float(current_step)/total_steps
  440. # Calculate the number of hashes to display
  441. arrow = '-'*int(round(percent*bar_length) - 1) + '>'
  442. spaces = ' '*(bar_length - len(arrow))
  443. # Create the progress bar string
  444. progress_bar = f'Progress: [{arrow}{spaces}] {int(percent * 100)}% ({current_step}/{total_steps})'
  445. return progress_bar
  446. async def stream_image(_request_id: str, result, is_finished: bool):
  447. if isinstance(result, list):
  448. await response.write(json.dumps({'progress': get_progress_bar((result[0]), (result[1]))}).encode('utf-8') + b'\n')
  449. elif isinstance(result, np.ndarray):
  450. try:
  451. im = Image.fromarray(np.array(result))
  452. # Save the image to a file
  453. image_filename = f"{_request_id}.png"
  454. image_path = self.images_dir/image_filename
  455. im.save(image_path)
  456. # Get URL for the saved image
  457. try:
  458. image_url = request.app.router['static_images'].url_for(filename=image_filename)
  459. base_url = f"{request.scheme}://{request.host}"
  460. full_image_url = base_url + str(image_url)
  461. await response.write(json.dumps({'images': [{'url': str(full_image_url), 'content_type': 'image/png'}]}).encode('utf-8') + b'\n')
  462. except KeyError as e:
  463. if DEBUG >= 2: print(f"Error getting image URL: {e}")
  464. # Fallback to direct file path if URL generation fails
  465. await response.write(json.dumps({'images': [{'url': str(image_path), 'content_type': 'image/png'}]}).encode('utf-8') + b'\n')
  466. if is_finished:
  467. await response.write_eof()
  468. except Exception as e:
  469. if DEBUG >= 2: print(f"Error processing image: {e}")
  470. if DEBUG >= 2: traceback.print_exc()
  471. await response.write(json.dumps({'error': str(e)}).encode('utf-8') + b'\n')
  472. stream_task = None
  473. def on_result(_request_id: str, result, is_finished: bool):
  474. nonlocal stream_task
  475. stream_task = asyncio.create_task(stream_image(_request_id, result, is_finished))
  476. return _request_id == request_id and is_finished
  477. await callback.wait(on_result, timeout=self.response_timeout*10)
  478. if stream_task:
  479. # Wait for the stream task to complete before returning
  480. await stream_task
  481. return response
  482. except Exception as e:
  483. if DEBUG >= 2: traceback.print_exc()
  484. return web.json_response({"detail": f"Error processing prompt (see logs with DEBUG>=2): {str(e)}"}, status=500)
  485. async def handle_delete_model(self, request):
  486. try:
  487. model_name = request.match_info.get('model_name')
  488. if DEBUG >= 2: print(f"Attempting to delete model: {model_name}")
  489. if not model_name or model_name not in model_cards:
  490. return web.json_response({"detail": f"Invalid model name: {model_name}"}, status=400)
  491. shard = build_base_shard(model_name, self.inference_engine_classname)
  492. if not shard:
  493. return web.json_response({"detail": "Could not build shard for model"}, status=400)
  494. repo_id = get_repo(shard.model_id, self.inference_engine_classname)
  495. if DEBUG >= 2: print(f"Repo ID for model: {repo_id}")
  496. # Get the HF cache directory using the helper function
  497. hf_home = get_hf_home()
  498. cache_dir = get_repo_root(repo_id)
  499. if DEBUG >= 2: print(f"Looking for model files in: {cache_dir}")
  500. if os.path.exists(cache_dir):
  501. if DEBUG >= 2: print(f"Found model files at {cache_dir}, deleting...")
  502. try:
  503. shutil.rmtree(cache_dir)
  504. return web.json_response({"status": "success", "message": f"Model {model_name} deleted successfully", "path": str(cache_dir)})
  505. except Exception as e:
  506. return web.json_response({"detail": f"Failed to delete model files: {str(e)}"}, status=500)
  507. else:
  508. return web.json_response({"detail": f"Model files not found at {cache_dir}"}, status=404)
  509. except Exception as e:
  510. print(f"Error in handle_delete_model: {str(e)}")
  511. traceback.print_exc()
  512. return web.json_response({"detail": f"Server error: {str(e)}"}, status=500)
  513. async def handle_get_initial_models(self, request):
  514. model_data = {}
  515. for model_name, pretty in pretty_name.items():
  516. model_data[model_name] = {
  517. "name": pretty,
  518. "downloaded": None, # Initially unknown
  519. "download_percentage": None, # Change from 0 to null
  520. "total_size": None,
  521. "total_downloaded": None,
  522. "loading": True # Add loading state
  523. }
  524. return web.json_response(model_data)
  525. async def handle_create_animation(self, request):
  526. try:
  527. data = await request.json()
  528. replacement_image_path = data.get("replacement_image_path")
  529. device_name = data.get("device_name", "Local Device")
  530. prompt_text = data.get("prompt", "")
  531. if DEBUG >= 2: print(f"Creating animation with params: replacement_image={replacement_image_path}, device={device_name}, prompt={prompt_text}")
  532. if not replacement_image_path:
  533. return web.json_response({"error": "replacement_image_path is required"}, status=400)
  534. # Create temp directory if it doesn't exist
  535. tmp_dir = Path(tempfile.gettempdir())/"exo_animations"
  536. tmp_dir.mkdir(parents=True, exist_ok=True)
  537. # Generate unique output filename in temp directory
  538. output_filename = f"animation_{uuid.uuid4()}.mp4"
  539. output_path = str(tmp_dir/output_filename)
  540. if DEBUG >= 2: print(f"Animation temp directory: {tmp_dir}, output file: {output_path}, directory exists: {tmp_dir.exists()}, directory permissions: {oct(tmp_dir.stat().st_mode)[-3:]}")
  541. # Create the animation
  542. create_animation_mp4(replacement_image_path, output_path, device_name, prompt_text)
  543. return web.json_response({"status": "success", "output_path": output_path})
  544. except Exception as e:
  545. if DEBUG >= 2: traceback.print_exc()
  546. return web.json_response({"error": str(e)}, status=500)
  547. async def handle_post_download(self, request):
  548. try:
  549. data = await request.json()
  550. model_name = data.get("model")
  551. if not model_name: return web.json_response({"error": "model parameter is required"}, status=400)
  552. if model_name not in model_cards: return web.json_response({"error": f"Invalid model: {model_name}. Supported models: {list(model_cards.keys())}"}, status=400)
  553. shard = build_base_shard(model_name, self.inference_engine_classname)
  554. if not shard: return web.json_response({"error": f"Could not build shard for model {model_name}"}, status=400)
  555. asyncio.create_task(self.node.inference_engine.shard_downloader.ensure_shard(shard, self.inference_engine_classname))
  556. return web.json_response({"status": "success", "message": f"Download started for model: {model_name}"})
  557. except Exception as e:
  558. if DEBUG >= 2: traceback.print_exc()
  559. return web.json_response({"error": str(e)}, status=500)
  560. async def handle_get_topology(self, request):
  561. try:
  562. topology = self.node.current_topology
  563. if topology:
  564. return web.json_response(topology.to_json())
  565. else:
  566. return web.json_response({})
  567. except Exception as e:
  568. if DEBUG >= 2: traceback.print_exc()
  569. return web.json_response({"detail": f"Error getting topology: {str(e)}"}, status=500)
  570. async def handle_tokens(self, request_id: str, tokens: List[int], is_finished: bool):
  571. await self.token_queues[request_id].put((tokens, is_finished))
  572. async def run(self, host: str = "0.0.0.0", port: int = 52415):
  573. runner = web.AppRunner(self.app)
  574. await runner.setup()
  575. site = web.TCPSite(runner, host, port)
  576. await site.start()
  577. def base64_decode(self, base64_string):
  578. #decode and reshape image
  579. if base64_string.startswith('data:image'):
  580. base64_string = base64_string.split(',')[1]
  581. image_data = base64.b64decode(base64_string)
  582. img = Image.open(BytesIO(image_data))
  583. W, H = (dim - dim%64 for dim in (img.width, img.height))
  584. if W != img.width or H != img.height:
  585. if DEBUG >= 2: print(f"Warning: image shape is not divisible by 64, downsampling to {W}x{H}")
  586. img = img.resize((W, H), Image.NEAREST) # use desired downsampling filter
  587. img = mx.array(np.array(img))
  588. img = (img[:, :, :3].astype(mx.float32)/255)*2 - 1
  589. img = img[None]
  590. return img