|
@@ -36,7 +36,6 @@ parser.add_argument("--node-port", type=int, default=None, help="Node port")
|
|
|
parser.add_argument("--listen-port", type=int, default=5678, help="Listening port for discovery")
|
|
|
parser.add_argument("--download-quick-check", action="store_true", help="Quick check local path for model shards download")
|
|
|
parser.add_argument("--max-parallel-downloads", type=int, default=4, help="Max parallel downloads for model shards download")
|
|
|
-parser.add_argument("--max-caches", type=int, default=2, help="Max caches to keep in memory at once.")
|
|
|
parser.add_argument("--prometheus-client-port", type=int, default=None, help="Prometheus client port")
|
|
|
parser.add_argument("--broadcast-port", type=int, default=5678, help="Broadcast port for discovery")
|
|
|
parser.add_argument("--discovery-module", type=str, choices=["udp", "tailscale", "manual"], default="udp", help="Discovery module to use")
|
|
@@ -65,7 +64,7 @@ shard_downloader: ShardDownloader = HFShardDownloader(quick_check=args.download_
|
|
|
inference_engine_name = args.inference_engine or ("mlx" if system_info == "Apple Silicon Mac" else "tinygrad")
|
|
|
print(f"Inference engine name after selection: {inference_engine_name}")
|
|
|
|
|
|
-inference_engine = get_inference_engine(inference_engine_name, shard_downloader, max_caches=args.max_caches)
|
|
|
+inference_engine = get_inference_engine(inference_engine_name, shard_downloader)
|
|
|
print(f"Using inference engine: {inference_engine.__class__.__name__} with shard downloader: {shard_downloader.__class__.__name__}")
|
|
|
|
|
|
if args.node_port is None:
|