|
@@ -22,6 +22,7 @@ parser.add_argument("--listen-port", type=int, default=5678, help="Listening por
|
|
parser.add_argument("--broadcast-port", type=int, default=5678, help="Broadcast port for discovery")
|
|
parser.add_argument("--broadcast-port", type=int, default=5678, help="Broadcast port for discovery")
|
|
parser.add_argument("--wait-for-peers", type=int, default=0, help="Number of peers to wait to connect to before starting")
|
|
parser.add_argument("--wait-for-peers", type=int, default=0, help="Number of peers to wait to connect to before starting")
|
|
parser.add_argument("--chatgpt-api-port", type=int, default=8000, help="ChatGPT API port")
|
|
parser.add_argument("--chatgpt-api-port", type=int, default=8000, help="ChatGPT API port")
|
|
|
|
+parser.add_argument("--chatgpt-api-response-timeout-secs", type=int, default=90, help="ChatGPT API response timeout in seconds")
|
|
parser.add_argument("--inference-engine", type=str, default=None, help="Inference engine to use")
|
|
parser.add_argument("--inference-engine", type=str, default=None, help="Inference engine to use")
|
|
args = parser.parse_args()
|
|
args = parser.parse_args()
|
|
|
|
|
|
@@ -57,7 +58,7 @@ discovery = GRPCDiscovery(args.node_id, args.node_port, args.listen_port, args.b
|
|
node = StandardNode(args.node_id, None, inference_engine, discovery, partitioning_strategy=RingMemoryWeightedPartitioningStrategy(), chatgpt_api_endpoint=f"http://localhost:{args.chatgpt_api_port}/v1/chat/completions", web_chat_url=f"http://localhost:{args.chatgpt_api_port}")
|
|
node = StandardNode(args.node_id, None, inference_engine, discovery, partitioning_strategy=RingMemoryWeightedPartitioningStrategy(), chatgpt_api_endpoint=f"http://localhost:{args.chatgpt_api_port}/v1/chat/completions", web_chat_url=f"http://localhost:{args.chatgpt_api_port}")
|
|
server = GRPCServer(node, args.node_host, args.node_port)
|
|
server = GRPCServer(node, args.node_host, args.node_port)
|
|
node.server = server
|
|
node.server = server
|
|
-api = ChatGPTAPI(node, inference_engine.__class__.__name__)
|
|
|
|
|
|
+api = ChatGPTAPI(node, inference_engine.__class__.__name__, response_timeout_secs=args.chatgpt_api_response_timeout_secs)
|
|
|
|
|
|
node.on_token.register("main_log").on_next(lambda _, tokens , __: print(inference_engine.tokenizer.decode(tokens) if hasattr(inference_engine, "tokenizer") else tokens))
|
|
node.on_token.register("main_log").on_next(lambda _, tokens , __: print(inference_engine.tokenizer.decode(tokens) if hasattr(inference_engine, "tokenizer") else tokens))
|
|
|
|
|