1
0

chatgpt_api.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. import uuid
  2. import time
  3. import asyncio
  4. import json
  5. import os
  6. from pathlib import Path
  7. from transformers import AutoTokenizer
  8. from typing import List, Literal, Union, Dict, Optional
  9. from aiohttp import web
  10. import aiohttp_cors
  11. import traceback
  12. import signal
  13. from exo import DEBUG, VERSION
  14. from exo.download.download_progress import RepoProgressEvent
  15. from exo.helpers import PrefixDict, shutdown
  16. from exo.inference.tokenizers import resolve_tokenizer
  17. from exo.orchestration import Node
  18. from exo.models import build_base_shard, model_cards, get_repo, pretty_name
  19. from typing import Callable, Optional
  20. from exo.download.hf.hf_shard_download import HFShardDownloader
  21. import shutil
  22. from exo.download.hf.hf_helpers import get_hf_home, get_repo_root
  23. from exo.apputil import create_animation_mp4
  24. class Message:
  25. def __init__(self, role: str, content: Union[str, List[Dict[str, Union[str, Dict[str, str]]]]], tools: Optional[List[Dict]] = None):
  26. self.role = role
  27. self.content = content
  28. self.tools = tools
  29. def to_dict(self):
  30. data = {"role": self.role, "content": self.content}
  31. if self.tools:
  32. data["tools"] = self.tools
  33. return data
  34. class ChatCompletionRequest:
  35. def __init__(self, model: str, messages: List[Message], temperature: float, tools: Optional[List[Dict]] = None):
  36. self.model = model
  37. self.messages = messages
  38. self.temperature = temperature
  39. self.tools = tools
  40. def to_dict(self):
  41. return {"model": self.model, "messages": [message.to_dict() for message in self.messages], "temperature": self.temperature, "tools": self.tools}
  42. def generate_completion(
  43. chat_request: ChatCompletionRequest,
  44. tokenizer,
  45. prompt: str,
  46. request_id: str,
  47. tokens: List[int],
  48. stream: bool,
  49. finish_reason: Union[Literal["length", "stop"], None],
  50. object_type: Literal["chat.completion", "text_completion"],
  51. ) -> dict:
  52. completion = {
  53. "id": f"chatcmpl-{request_id}",
  54. "object": object_type,
  55. "created": int(time.time()),
  56. "model": chat_request.model,
  57. "system_fingerprint": f"exo_{VERSION}",
  58. "choices": [{
  59. "index": 0,
  60. "message": {"role": "assistant", "content": tokenizer.decode(tokens)},
  61. "logprobs": None,
  62. "finish_reason": finish_reason,
  63. }],
  64. }
  65. if not stream:
  66. completion["usage"] = {
  67. "prompt_tokens": len(tokenizer.encode(prompt)),
  68. "completion_tokens": len(tokens),
  69. "total_tokens": len(tokenizer.encode(prompt)) + len(tokens),
  70. }
  71. choice = completion["choices"][0]
  72. if object_type.startswith("chat.completion"):
  73. key_name = "delta" if stream else "message"
  74. choice[key_name] = {"role": "assistant", "content": tokenizer.decode(tokens)}
  75. elif object_type == "text_completion":
  76. choice["text"] = tokenizer.decode(tokens)
  77. else:
  78. ValueError(f"Unsupported response type: {object_type}")
  79. return completion
  80. def remap_messages(messages: List[Message]) -> List[Message]:
  81. remapped_messages = []
  82. last_image = None
  83. for message in messages:
  84. if not isinstance(message.content, list):
  85. remapped_messages.append(message)
  86. continue
  87. remapped_content = []
  88. for content in message.content:
  89. if isinstance(content, dict):
  90. if content.get("type") in ["image_url", "image"]:
  91. image_url = content.get("image_url", {}).get("url") or content.get("image")
  92. if image_url:
  93. last_image = {"type": "image", "image": image_url}
  94. remapped_content.append({"type": "text", "text": "[An image was uploaded but is not displayed here]"})
  95. else:
  96. remapped_content.append(content)
  97. else:
  98. remapped_content.append(content)
  99. remapped_messages.append(Message(role=message.role, content=remapped_content))
  100. if last_image:
  101. # Replace the last image placeholder with the actual image content
  102. for message in reversed(remapped_messages):
  103. for i, content in enumerate(message.content):
  104. if isinstance(content, dict):
  105. if content.get("type") == "text" and content.get("text") == "[An image was uploaded but is not displayed here]":
  106. message.content[i] = last_image
  107. return remapped_messages
  108. return remapped_messages
  109. def build_prompt(tokenizer, _messages: List[Message], tools: Optional[List[Dict]] = None):
  110. messages = remap_messages(_messages)
  111. chat_template_args = {
  112. "conversation": [m.to_dict() for m in messages],
  113. "tokenize": False,
  114. "add_generation_prompt": True
  115. }
  116. if tools: chat_template_args["tools"] = tools
  117. prompt = tokenizer.apply_chat_template(**chat_template_args)
  118. print(f"!!! Prompt: {prompt}")
  119. return prompt
  120. def parse_message(data: dict):
  121. if "role" not in data or "content" not in data:
  122. raise ValueError(f"Invalid message: {data}. Must have 'role' and 'content'")
  123. return Message(data["role"], data["content"], data.get("tools"))
  124. def parse_chat_request(data: dict, default_model: str):
  125. return ChatCompletionRequest(
  126. data.get("model", default_model),
  127. [parse_message(msg) for msg in data["messages"]],
  128. data.get("temperature", 0.0),
  129. data.get("tools", None),
  130. )
  131. class PromptSession:
  132. def __init__(self, request_id: str, timestamp: int, prompt: str):
  133. self.request_id = request_id
  134. self.timestamp = timestamp
  135. self.prompt = prompt
  136. class ChatGPTAPI:
  137. def __init__(self, node: Node, inference_engine_classname: str, response_timeout: int = 90, on_chat_completion_request: Callable[[str, ChatCompletionRequest, str], None] = None, default_model: Optional[str] = None):
  138. self.node = node
  139. self.inference_engine_classname = inference_engine_classname
  140. self.response_timeout = response_timeout
  141. self.on_chat_completion_request = on_chat_completion_request
  142. self.app = web.Application(client_max_size=100*1024*1024) # 100MB to support image upload
  143. self.prompts: PrefixDict[str, PromptSession] = PrefixDict()
  144. self.prev_token_lens: Dict[str, int] = {}
  145. self.stream_tasks: Dict[str, asyncio.Task] = {}
  146. self.default_model = default_model or "llama-3.2-1b"
  147. cors = aiohttp_cors.setup(self.app)
  148. cors_options = aiohttp_cors.ResourceOptions(
  149. allow_credentials=True,
  150. expose_headers="*",
  151. allow_headers="*",
  152. allow_methods="*",
  153. )
  154. cors.add(self.app.router.add_get("/models", self.handle_get_models), {"*": cors_options})
  155. cors.add(self.app.router.add_get("/v1/models", self.handle_get_models), {"*": cors_options})
  156. cors.add(self.app.router.add_post("/chat/token/encode", self.handle_post_chat_token_encode), {"*": cors_options})
  157. cors.add(self.app.router.add_post("/v1/chat/token/encode", self.handle_post_chat_token_encode), {"*": cors_options})
  158. cors.add(self.app.router.add_post("/chat/completions", self.handle_post_chat_completions), {"*": cors_options})
  159. cors.add(self.app.router.add_post("/v1/chat/completions", self.handle_post_chat_completions), {"*": cors_options})
  160. cors.add(self.app.router.add_get("/v1/download/progress", self.handle_get_download_progress), {"*": cors_options})
  161. cors.add(self.app.router.add_get("/modelpool", self.handle_model_support), {"*": cors_options})
  162. cors.add(self.app.router.add_get("/healthcheck", self.handle_healthcheck), {"*": cors_options})
  163. cors.add(self.app.router.add_post("/quit", self.handle_quit), {"*": cors_options})
  164. cors.add(self.app.router.add_delete("/models/{model_name}", self.handle_delete_model), {"*": cors_options})
  165. cors.add(self.app.router.add_get("/initial_models", self.handle_get_initial_models), {"*": cors_options})
  166. cors.add(self.app.router.add_post("/create_animation", self.handle_create_animation), {"*": cors_options})
  167. cors.add(self.app.router.add_post("/download", self.handle_post_download), {"*": cors_options})
  168. cors.add(self.app.router.add_get("/topology", self.handle_get_topology), {"*": cors_options})
  169. if "__compiled__" not in globals():
  170. self.static_dir = Path(__file__).parent.parent/"tinychat"
  171. self.app.router.add_get("/", self.handle_root)
  172. self.app.router.add_static("/", self.static_dir, name="static")
  173. self.app.middlewares.append(self.timeout_middleware)
  174. self.app.middlewares.append(self.log_request)
  175. async def handle_quit(self, request):
  176. if DEBUG>=1: print("Received quit signal")
  177. response = web.json_response({"detail": "Quit signal received"}, status=200)
  178. await response.prepare(request)
  179. await response.write_eof()
  180. await shutdown(signal.SIGINT, asyncio.get_event_loop(), self.node.server)
  181. async def timeout_middleware(self, app, handler):
  182. async def middleware(request):
  183. try:
  184. return await asyncio.wait_for(handler(request), timeout=self.response_timeout)
  185. except asyncio.TimeoutError:
  186. return web.json_response({"detail": "Request timed out"}, status=408)
  187. return middleware
  188. async def log_request(self, app, handler):
  189. async def middleware(request):
  190. if DEBUG >= 2: print(f"Received request: {request.method} {request.path}")
  191. return await handler(request)
  192. return middleware
  193. async def handle_root(self, request):
  194. return web.FileResponse(self.static_dir/"index.html")
  195. async def handle_healthcheck(self, request):
  196. return web.json_response({"status": "ok"})
  197. async def handle_model_support(self, request):
  198. try:
  199. response = web.StreamResponse(
  200. status=200,
  201. reason='OK',
  202. headers={
  203. 'Content-Type': 'text/event-stream',
  204. 'Cache-Control': 'no-cache',
  205. 'Connection': 'keep-alive',
  206. }
  207. )
  208. await response.prepare(request)
  209. for model_name, pretty in pretty_name.items():
  210. if model_name in model_cards:
  211. model_info = model_cards[model_name]
  212. if self.inference_engine_classname in model_info.get("repo", {}):
  213. shard = build_base_shard(model_name, self.inference_engine_classname)
  214. if shard:
  215. downloader = HFShardDownloader(quick_check=True)
  216. downloader.current_shard = shard
  217. downloader.current_repo_id = get_repo(shard.model_id, self.inference_engine_classname)
  218. status = await downloader.get_shard_download_status()
  219. download_percentage = status.get("overall") if status else None
  220. total_size = status.get("total_size") if status else None
  221. total_downloaded = status.get("total_downloaded") if status else False
  222. model_data = {
  223. model_name: {
  224. "name": pretty,
  225. "downloaded": download_percentage == 100 if download_percentage is not None else False,
  226. "download_percentage": download_percentage,
  227. "total_size": total_size,
  228. "total_downloaded": total_downloaded
  229. }
  230. }
  231. await response.write(f"data: {json.dumps(model_data)}\n\n".encode())
  232. await response.write(b"data: [DONE]\n\n")
  233. return response
  234. except Exception as e:
  235. print(f"Error in handle_model_support: {str(e)}")
  236. traceback.print_exc()
  237. return web.json_response(
  238. {"detail": f"Server error: {str(e)}"},
  239. status=500
  240. )
  241. async def handle_get_models(self, request):
  242. return web.json_response([{"id": model_name, "object": "model", "owned_by": "exo", "ready": True} for model_name, _ in model_cards.items()])
  243. async def handle_post_chat_token_encode(self, request):
  244. data = await request.json()
  245. model = data.get("model", self.default_model)
  246. if model and model.startswith("gpt-"): # Handle gpt- model requests
  247. model = self.default_model
  248. if not model or model not in model_cards:
  249. if DEBUG >= 1: print(f"Invalid model: {model}. Supported: {list(model_cards.keys())}. Defaulting to {self.default_model}")
  250. model = self.default_model
  251. shard = build_base_shard(model, self.inference_engine_classname)
  252. messages = [parse_message(msg) for msg in data.get("messages", [])]
  253. tokenizer = await resolve_tokenizer(get_repo(shard.model_id, self.inference_engine_classname))
  254. prompt = build_prompt(tokenizer, messages, data.get("tools", None))
  255. tokens = tokenizer.encode(prompt)
  256. return web.json_response({
  257. "length": len(prompt),
  258. "num_tokens": len(tokens),
  259. "encoded_tokens": tokens,
  260. "encoded_prompt": prompt,
  261. })
  262. async def handle_get_download_progress(self, request):
  263. progress_data = {}
  264. for node_id, progress_event in self.node.node_download_progress.items():
  265. if isinstance(progress_event, RepoProgressEvent):
  266. progress_data[node_id] = progress_event.to_dict()
  267. else:
  268. print(f"Unknown progress event type: {type(progress_event)}. {progress_event}")
  269. return web.json_response(progress_data)
  270. async def handle_post_chat_completions(self, request):
  271. data = await request.json()
  272. if DEBUG >= 2: print(f"Handling chat completions request from {request.remote}: {data}")
  273. stream = data.get("stream", False)
  274. chat_request = parse_chat_request(data, self.default_model)
  275. if chat_request.model and chat_request.model.startswith("gpt-"): # to be compatible with ChatGPT tools, point all gpt- model requests to default model
  276. chat_request.model = self.default_model
  277. if not chat_request.model or chat_request.model not in model_cards:
  278. if DEBUG >= 1: print(f"Invalid model: {chat_request.model}. Supported: {list(model_cards.keys())}. Defaulting to {self.default_model}")
  279. chat_request.model = self.default_model
  280. shard = build_base_shard(chat_request.model, self.inference_engine_classname)
  281. if not shard:
  282. supported_models = [model for model, info in model_cards.items() if self.inference_engine_classname in info.get("repo", {})]
  283. return web.json_response(
  284. {"detail": f"Unsupported model: {chat_request.model} with inference engine {self.inference_engine_classname}. Supported models for this engine: {supported_models}"},
  285. status=400,
  286. )
  287. tokenizer = await resolve_tokenizer(get_repo(shard.model_id, self.inference_engine_classname))
  288. if DEBUG >= 4: print(f"Resolved tokenizer: {tokenizer}")
  289. prompt = build_prompt(tokenizer, chat_request.messages, chat_request.tools)
  290. request_id = str(uuid.uuid4())
  291. if self.on_chat_completion_request:
  292. try:
  293. self.on_chat_completion_request(request_id, chat_request, prompt)
  294. except Exception as e:
  295. if DEBUG >= 2: traceback.print_exc()
  296. # request_id = None
  297. # match = self.prompts.find_longest_prefix(prompt)
  298. # if match and len(prompt) > len(match[1].prompt):
  299. # if DEBUG >= 2:
  300. # print(f"Prompt for request starts with previous prompt {len(match[1].prompt)} of {len(prompt)}: {match[1].prompt}")
  301. # request_id = match[1].request_id
  302. # self.prompts.add(prompt, PromptSession(request_id=request_id, timestamp=int(time.time()), prompt=prompt))
  303. # # remove the matching prefix from the prompt
  304. # prompt = prompt[len(match[1].prompt):]
  305. # else:
  306. # request_id = str(uuid.uuid4())
  307. # self.prompts.add(prompt, PromptSession(request_id=request_id, timestamp=int(time.time()), prompt=prompt))
  308. callback_id = f"chatgpt-api-wait-response-{request_id}"
  309. callback = self.node.on_token.register(callback_id)
  310. if DEBUG >= 2: print(f"Sending prompt from ChatGPT api {request_id=} {shard=} {prompt=}")
  311. try:
  312. await asyncio.wait_for(asyncio.shield(asyncio.create_task(self.node.process_prompt(shard, prompt, request_id=request_id))), timeout=self.response_timeout)
  313. if DEBUG >= 2: print(f"Waiting for response to finish. timeout={self.response_timeout}s")
  314. if stream:
  315. response = web.StreamResponse(
  316. status=200,
  317. reason="OK",
  318. headers={
  319. "Content-Type": "text/event-stream",
  320. "Cache-Control": "no-cache",
  321. },
  322. )
  323. await response.prepare(request)
  324. async def stream_result(_request_id: str, tokens: List[int], is_finished: bool):
  325. prev_last_tokens_len = self.prev_token_lens.get(_request_id, 0)
  326. self.prev_token_lens[_request_id] = max(prev_last_tokens_len, len(tokens))
  327. new_tokens = tokens[prev_last_tokens_len:]
  328. finish_reason = None
  329. eos_token_id = tokenizer.special_tokens_map.get("eos_token_id") if hasattr(tokenizer, "_tokenizer") and isinstance(tokenizer._tokenizer,
  330. AutoTokenizer) else getattr(tokenizer, "eos_token_id", None)
  331. if len(new_tokens) > 0 and new_tokens[-1] == eos_token_id:
  332. new_tokens = new_tokens[:-1]
  333. if is_finished:
  334. finish_reason = "stop"
  335. if is_finished and not finish_reason:
  336. finish_reason = "length"
  337. completion = generate_completion(
  338. chat_request,
  339. tokenizer,
  340. prompt,
  341. request_id,
  342. new_tokens,
  343. stream,
  344. finish_reason,
  345. "chat.completion",
  346. )
  347. if DEBUG >= 2: print(f"Streaming completion: {completion}")
  348. try:
  349. await response.write(f"data: {json.dumps(completion)}\n\n".encode())
  350. except Exception as e:
  351. if DEBUG >= 2: print(f"Error streaming completion: {e}")
  352. if DEBUG >= 2: traceback.print_exc()
  353. def on_result(_request_id: str, tokens: List[int], is_finished: bool):
  354. if _request_id == request_id: self.stream_tasks[_request_id] = asyncio.create_task(stream_result(_request_id, tokens, is_finished))
  355. return _request_id == request_id and is_finished
  356. _, tokens, _ = await callback.wait(on_result, timeout=self.response_timeout)
  357. if request_id in self.stream_tasks: # in case there is still a stream task running, wait for it to complete
  358. if DEBUG >= 2: print("Pending stream task. Waiting for stream task to complete.")
  359. try:
  360. await asyncio.wait_for(self.stream_tasks[request_id], timeout=30)
  361. except asyncio.TimeoutError:
  362. print("WARNING: Stream task timed out. This should not happen.")
  363. await response.write_eof()
  364. return response
  365. else:
  366. _, tokens, _ = await callback.wait(
  367. lambda _request_id, tokens, is_finished: _request_id == request_id and is_finished,
  368. timeout=self.response_timeout,
  369. )
  370. finish_reason = "length"
  371. eos_token_id = tokenizer.special_tokens_map.get("eos_token_id") if isinstance(getattr(tokenizer, "_tokenizer", None), AutoTokenizer) else tokenizer.eos_token_id
  372. if DEBUG >= 2: print(f"Checking if end of tokens result {tokens[-1]=} is {eos_token_id=}")
  373. if tokens[-1] == eos_token_id:
  374. tokens = tokens[:-1]
  375. finish_reason = "stop"
  376. return web.json_response(generate_completion(chat_request, tokenizer, prompt, request_id, tokens, stream, finish_reason, "chat.completion"))
  377. except asyncio.TimeoutError:
  378. return web.json_response({"detail": "Response generation timed out"}, status=408)
  379. except Exception as e:
  380. if DEBUG >= 2: traceback.print_exc()
  381. return web.json_response({"detail": f"Error processing prompt (see logs with DEBUG>=2): {str(e)}"}, status=500)
  382. finally:
  383. deregistered_callback = self.node.on_token.deregister(callback_id)
  384. if DEBUG >= 2: print(f"Deregister {callback_id=} {deregistered_callback=}")
  385. async def handle_delete_model(self, request):
  386. try:
  387. model_name = request.match_info.get('model_name')
  388. if DEBUG >= 2: print(f"Attempting to delete model: {model_name}")
  389. if not model_name or model_name not in model_cards:
  390. return web.json_response(
  391. {"detail": f"Invalid model name: {model_name}"},
  392. status=400
  393. )
  394. shard = build_base_shard(model_name, self.inference_engine_classname)
  395. if not shard:
  396. return web.json_response(
  397. {"detail": "Could not build shard for model"},
  398. status=400
  399. )
  400. repo_id = get_repo(shard.model_id, self.inference_engine_classname)
  401. if DEBUG >= 2: print(f"Repo ID for model: {repo_id}")
  402. # Get the HF cache directory using the helper function
  403. hf_home = get_hf_home()
  404. cache_dir = get_repo_root(repo_id)
  405. if DEBUG >= 2: print(f"Looking for model files in: {cache_dir}")
  406. if os.path.exists(cache_dir):
  407. if DEBUG >= 2: print(f"Found model files at {cache_dir}, deleting...")
  408. try:
  409. shutil.rmtree(cache_dir)
  410. return web.json_response({
  411. "status": "success",
  412. "message": f"Model {model_name} deleted successfully",
  413. "path": str(cache_dir)
  414. })
  415. except Exception as e:
  416. return web.json_response({
  417. "detail": f"Failed to delete model files: {str(e)}"
  418. }, status=500)
  419. else:
  420. return web.json_response({
  421. "detail": f"Model files not found at {cache_dir}"
  422. }, status=404)
  423. except Exception as e:
  424. print(f"Error in handle_delete_model: {str(e)}")
  425. traceback.print_exc()
  426. return web.json_response({
  427. "detail": f"Server error: {str(e)}"
  428. }, status=500)
  429. async def handle_get_initial_models(self, request):
  430. model_data = {}
  431. for model_name, pretty in pretty_name.items():
  432. model_data[model_name] = {
  433. "name": pretty,
  434. "downloaded": None, # Initially unknown
  435. "download_percentage": None, # Change from 0 to null
  436. "total_size": None,
  437. "total_downloaded": None,
  438. "loading": True # Add loading state
  439. }
  440. return web.json_response(model_data)
  441. async def handle_create_animation(self, request):
  442. try:
  443. data = await request.json()
  444. replacement_image_path = data.get("replacement_image_path")
  445. device_name = data.get("device_name", "Local Device")
  446. prompt_text = data.get("prompt", "")
  447. if DEBUG >= 2: print(f"Creating animation with params: replacement_image={replacement_image_path}, device={device_name}, prompt={prompt_text}")
  448. if not replacement_image_path:
  449. return web.json_response({"error": "replacement_image_path is required"}, status=400)
  450. # Create temp directory if it doesn't exist
  451. tmp_dir = Path(tempfile.gettempdir())/"exo_animations"
  452. tmp_dir.mkdir(parents=True, exist_ok=True)
  453. # Generate unique output filename in temp directory
  454. output_filename = f"animation_{uuid.uuid4()}.mp4"
  455. output_path = str(tmp_dir/output_filename)
  456. if DEBUG >= 2: print(f"Animation temp directory: {tmp_dir}, output file: {output_path}, directory exists: {tmp_dir.exists()}, directory permissions: {oct(tmp_dir.stat().st_mode)[-3:]}")
  457. # Create the animation
  458. create_animation_mp4(
  459. replacement_image_path,
  460. output_path,
  461. device_name,
  462. prompt_text
  463. )
  464. return web.json_response({
  465. "status": "success",
  466. "output_path": output_path
  467. })
  468. except Exception as e:
  469. if DEBUG >= 2: traceback.print_exc()
  470. return web.json_response({"error": str(e)}, status=500)
  471. async def handle_post_download(self, request):
  472. try:
  473. data = await request.json()
  474. model_name = data.get("model")
  475. if not model_name: return web.json_response({"error": "model parameter is required"}, status=400)
  476. if model_name not in model_cards: return web.json_response({"error": f"Invalid model: {model_name}. Supported models: {list(model_cards.keys())}"}, status=400)
  477. shard = build_base_shard(model_name, self.inference_engine_classname)
  478. if not shard: return web.json_response({"error": f"Could not build shard for model {model_name}"}, status=400)
  479. asyncio.create_task(self.node.inference_engine.ensure_shard(shard))
  480. return web.json_response({
  481. "status": "success",
  482. "message": f"Download started for model: {model_name}"
  483. })
  484. except Exception as e:
  485. if DEBUG >= 2: traceback.print_exc()
  486. return web.json_response({"error": str(e)}, status=500)
  487. async def handle_get_topology(self, request):
  488. try:
  489. topology = self.node.current_topology
  490. if topology:
  491. return web.json_response(topology.to_json())
  492. else:
  493. return web.json_response({})
  494. except Exception as e:
  495. if DEBUG >= 2: traceback.print_exc()
  496. return web.json_response(
  497. {"detail": f"Error getting topology: {str(e)}"},
  498. status=500
  499. )
  500. async def run(self, host: str = "0.0.0.0", port: int = 52415):
  501. runner = web.AppRunner(self.app)
  502. await runner.setup()
  503. site = web.TCPSite(runner, host, port)
  504. await site.start()