|
@@ -2,7 +2,6 @@ import argparse
|
|
|
import asyncio
|
|
|
import signal
|
|
|
import uuid
|
|
|
-from typing import List
|
|
|
from exo.orchestration.standard_node import StandardNode
|
|
|
from exo.networking.grpc.grpc_server import GRPCServer
|
|
|
from exo.networking.grpc.grpc_discovery import GRPCDiscovery
|
|
@@ -41,11 +40,21 @@ if args.node_port is None:
|
|
|
if DEBUG >= 1: print(f"Using available port: {args.node_port}")
|
|
|
|
|
|
discovery = GRPCDiscovery(args.node_id, args.node_port, args.listen_port, args.broadcast_port, discovery_timeout=args.discovery_timeout)
|
|
|
-node = StandardNode(args.node_id, None, inference_engine, discovery, partitioning_strategy=RingMemoryWeightedPartitioningStrategy(), chatgpt_api_endpoint=f"http://localhost:{args.chatgpt_api_port}/v1/chat/completions", web_chat_url=f"http://localhost:{args.chatgpt_api_port}", disable_tui=args.disable_tui, max_generate_tokens=args.max_generate_tokens)
|
|
|
+node = StandardNode(
|
|
|
+ args.node_id,
|
|
|
+ None,
|
|
|
+ inference_engine,
|
|
|
+ discovery,
|
|
|
+ partitioning_strategy=RingMemoryWeightedPartitioningStrategy(),
|
|
|
+ chatgpt_api_endpoint=f"http://localhost:{args.chatgpt_api_port}/v1/chat/completions",
|
|
|
+ web_chat_url=f"http://localhost:{args.chatgpt_api_port}",
|
|
|
+ disable_tui=args.disable_tui,
|
|
|
+ max_generate_tokens=args.max_generate_tokens,
|
|
|
+)
|
|
|
server = GRPCServer(node, args.node_host, args.node_port)
|
|
|
node.server = server
|
|
|
api = ChatGPTAPI(node, inference_engine.__class__.__name__, response_timeout_secs=args.chatgpt_api_response_timeout_secs)
|
|
|
-node.on_token.register("main_log").on_next(lambda _, tokens , __: print(inference_engine.tokenizer.decode(tokens) if hasattr(inference_engine, "tokenizer") else tokens))
|
|
|
+node.on_token.register("main_log").on_next(lambda _, tokens, __: print(inference_engine.tokenizer.decode(tokens) if hasattr(inference_engine, "tokenizer") else tokens))
|
|
|
if args.prometheus_client_port:
|
|
|
from exo.stats.metrics import start_metrics_server
|
|
|
start_metrics_server(node, args.prometheus_client_port)
|