浏览代码

Merge pull request #8 from cadenmackenzie/main

updates
Caden MacKenzie 8 月之前
父节点
当前提交
bc83d1f81a

+ 28 - 4
.circleci/config.yml

@@ -27,11 +27,11 @@ commands:
             fi
 
             # Start first instance
-            HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout 900 2>&1 | tee output1.log &
+            HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout 900 --disable-tui 2>&1 | tee output1.log &
             PID1=$!
 
             # Start second instance
-            HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout 900 2>&1 | tee output2.log &
+            HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout 900 --disable-tui 2>&1 | tee output2.log &
             PID2=$!
 
             # Wait for discovery
@@ -149,9 +149,9 @@ jobs:
           name: Run discovery integration test
           command: |
             source env/bin/activate
-            DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 &
+            DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --disable-tui > output1.log 2>&1 &
             PID1=$!
-            DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 &
+            DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --disable-tui > output2.log 2>&1 &
             PID2=$!
             sleep 10
             kill $PID1 $PID2
@@ -247,6 +247,29 @@ jobs:
           prompt: "Keep responses concise. Who was the king of pop?"
           expected_output: "Michael Jackson"
 
+  measure_pip_sizes:
+    macos:
+      xcode: "16.0.0"
+    steps:
+      - checkout
+      - run:
+          name: Set up Python
+          command: |
+            brew install python@3.12
+            python3.12 -m venv env
+            source env/bin/activate
+      - run:
+          name: Install dependencies and measure sizes
+          command: |
+            source env/bin/activate
+            pip install --upgrade pip
+            pip install .
+            python ./extra/pipsize.py --json ./pipsize.json
+      - store_artifacts:
+          path: ./pipsize.json
+          destination: pip-sizes.json
+
+
 workflows:
   version: 2
   build_and_test:
@@ -257,3 +280,4 @@ workflows:
       - chatgpt_api_integration_test_tinygrad
       - chatgpt_api_integration_test_dummy
       - test_macos_m1
+      - measure_pip_sizes

+ 4 - 4
README.md

@@ -67,10 +67,10 @@ The current recommended way to install exo is from source.
 ### Prerequisites
 
 - Python>=3.12.0 is required because of [issues with asyncio](https://github.com/exo-explore/exo/issues/5) in previous versions.
-- Linux (with NVIDIA card):
-  - NVIDIA driver (test with `nvidia-smi`)
-  - CUDA (https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#cuda-cross-platform-installation) (test with `nvcc --version`)
-  - cuDNN (https://developer.nvidia.com/cudnn-downloads) (test with [link](https://docs.nvidia.com/deeplearning/cudnn/latest/installation/linux.html#verifying-the-install-on-linux:~:text=at%20a%20time.-,Verifying%20the%20Install%20on%20Linux,Test%20passed!,-Upgrading%20From%20Older))
+- For Linux with NVIDIA GPU support (Linux-only, skip if not using Linux or NVIDIA):
+  - NVIDIA driver - verify with `nvidia-smi`
+  - CUDA toolkit - install from [NVIDIA CUDA guide](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#cuda-cross-platform-installation), verify with `nvcc --version`
+  - cuDNN library - download from [NVIDIA cuDNN page](https://developer.nvidia.com/cudnn-downloads), verify installation by following [these steps](https://docs.nvidia.com/deeplearning/cudnn/latest/installation/linux.html#verifying-the-install-on-linux:~:text=at%20a%20time.-,Verifying%20the%20Install%20on%20Linux,Test%20passed!,-Upgrading%20From%20Older)
 
 ### Hardware Requirements
 

+ 2 - 1
exo/api/chatgpt_api.py

@@ -10,6 +10,7 @@ from aiohttp import web
 import aiohttp_cors
 import traceback
 import os
+import signal
 import sys
 from exo import DEBUG, VERSION
 from exo.download.download_progress import RepoProgressEvent
@@ -193,7 +194,7 @@ class ChatGPTAPI:
     response = web.json_response({"detail": "Quit signal received"}, status=200)
     await response.prepare(request)
     await response.write_eof()
-    await shutdown(signal.SIGINT, asyncio.get_event_loop())
+    await shutdown(signal.SIGINT, asyncio.get_event_loop(), self.node.server)
 
   async def timeout_middleware(self, app, handler):
     async def middleware(request):

+ 12 - 9
exo/download/hf/hf_helpers.py

@@ -17,7 +17,6 @@ from exo.helpers import DEBUG, is_frozen
 from exo.download.download_progress import RepoProgressEvent, RepoFileProgressEvent, RepoProgressCallback, RepoFileProgressCallback
 from exo.inference.shard import Shard
 import aiofiles
-from aiofiles import os as aios
 
 T = TypeVar("T")
 
@@ -109,16 +108,20 @@ async def move_models_to_hf(seed_dir: Union[str, Path]):
   """Move model in resources folder of app to .cache/huggingface/hub"""
   source_dir = Path(seed_dir)
   dest_dir = get_hf_home()/"hub"
-  await aios.makedirs(dest_dir, exist_ok=True)
-  async for path in source_dir.iterdir():
-    if path.is_dir() and path.startswith("models--"):
+  await aios.makedirs(dest_dir, exist_ok=True)  
+  for path in source_dir.iterdir():
+    if path.is_dir() and path.name.startswith("models--"):
       dest_path = dest_dir / path.name
-      if dest_path.exists():
-        if DEBUG>=1: print(f"skipping moving {dest_path}. File already exists")
+      if await aios.path.exists(dest_path):
+        print('Skipping moving model to .cache directory')
       else:
-        await aios.rename(str(path), str(dest_path))
-        
-
+        try:
+          await aios.rename(str(path), str(dest_path))
+        except Exception as e:
+          print(f'Error moving model to .cache: {e}')
+    
+    
+    
 async def fetch_file_list(session, repo_id, revision, path=""):
   api_url = f"{get_hf_endpoint()}/api/models/{repo_id}/tree/{revision}"
   url = f"{api_url}/{path}" if path else api_url

+ 1 - 2
exo/helpers.py

@@ -237,7 +237,7 @@ def get_all_ip_addresses():
     return ["localhost"]
 
 
-async def shutdown(signal, loop):
+async def shutdown(signal, loop, server):
   """Gracefully shutdown the server and close the asyncio loop."""
   print(f"Received exit signal {signal.name}...")
   print("Thank you for using exo.")
@@ -247,7 +247,6 @@ async def shutdown(signal, loop):
   print(f"Cancelling {len(server_tasks)} outstanding tasks")
   await asyncio.gather(*server_tasks, return_exceptions=True)
   await server.stop()
-  loop.stop()
 
 
 def is_frozen():

+ 2 - 0
exo/inference/test_inference_engine.py

@@ -13,6 +13,7 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e
   prompt = "In a single word only, what is the last name of the current president of the USA?"
   resp_full = await inference_engine_1.infer_prompt("A", shard=Shard(model_id=model_id, start_layer=0, end_layer=n_layers - 1, n_layers=n_layers), prompt=prompt)
   token_full = await inference_engine_1.sample(resp_full)
+  token_full = token_full.reshape(1, -1)
   next_resp_full = await inference_engine_1.infer_tensor(
     "A",
     shard=Shard(model_id=model_id, start_layer=0, end_layer=n_layers - 1, n_layers=n_layers),
@@ -27,6 +28,7 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e
     input_data=resp1,
   )
   tokens2 = await inference_engine_1.sample(resp2)
+  tokens2 = tokens2.reshape(1, -1)
   resp3 = await inference_engine_1.infer_tensor(
     "B",
     shard=Shard(model_id=model_id, start_layer=0, end_layer=pp, n_layers=n_layers),

+ 1 - 2
exo/inference/tinygrad/inference.py

@@ -40,8 +40,7 @@ MODEL_PARAMS = {
 def build_transformer(model_path: Path, shard: Shard, model_size="8B", device=None):
   # build model
   linear = nn.Linear
-  with Context(THREEFRY=0):
-    model = Transformer(**MODEL_PARAMS[model_size]["args"], linear=linear, max_context=8192, jit=True, shard=shard)
+  model = Transformer(**MODEL_PARAMS[model_size]["args"], linear=linear, max_context=8192, jit=True, shard=shard)
 
   # load weights
   if model_path.is_dir():

+ 3 - 3
exo/inference/tinygrad/models/llama.py

@@ -225,9 +225,9 @@ class Transformer:
       h = inputs
     return h
 
-  def forward(self, x: Tensor, start_pos: Variable, cache: Optional[List[Tensor]] = None):
-    if x.shape[0:2] == (1, 1) and self.forward_jit is not None:
-      return self.forward_jit(x, Variable("start_pos", 0, self.max_context).bind(start_pos), cache=cache)
+  def forward(self, x: Tensor, start_pos: int, cache: Optional[List[Tensor]] = None):
+    if x.shape[0:2] == (1, 1) and self.forward_jit is not None and start_pos != 0:
+      return self.forward_jit(x, Variable("start_pos", 1, self.max_context).bind(start_pos), cache=cache)
     return self.forward_base(x, start_pos, cache=cache)
 
   def __call__(self, tokens: Tensor, start_pos: Variable, cache: Optional[List[Tensor]] = None):

+ 17 - 3
exo/main.py

@@ -1,5 +1,6 @@
 import argparse
 import asyncio
+import atexit
 import signal
 import json
 import logging
@@ -193,6 +194,11 @@ async def run_model_cli(node: Node, inference_engine: InferenceEngine, model_nam
   finally:
     node.on_token.deregister(callback_id)
 
+def clean_path(path):
+    """Clean and resolve path"""
+    if path.startswith("Optional("):
+        path = path.strip('Optional("').rstrip('")')
+    return os.path.expanduser(path)
 
 async def main():
   loop = asyncio.get_running_loop()
@@ -211,13 +217,21 @@ async def main():
     
   if not args.models_seed_dir is None:
     try:
-      await move_models_to_hf(args.models_seed_dir)
+      models_seed_dir = clean_path(args.models_seed_dir)
+      await move_models_to_hf(models_seed_dir)
     except Exception as e:
       print(f"Error moving models to .cache/huggingface: {e}")
 
+  def restore_cursor():
+    if platform.system() != "Windows":
+        os.system("tput cnorm")  # Show cursor
+
+  # Restore the cursor when the program exits
+  atexit.register(restore_cursor)
+
   # Use a more direct approach to handle signals
   def handle_exit():
-    asyncio.ensure_future(shutdown(signal.SIGTERM, loop))
+    asyncio.ensure_future(shutdown(signal.SIGTERM, loop, node.server))
 
   if platform.system() != "Windows":
     for s in [signal.SIGINT, signal.SIGTERM]:
@@ -244,7 +258,7 @@ def run():
   except KeyboardInterrupt:
     print("Received keyboard interrupt. Shutting down...")
   finally:
-    loop.run_until_complete(shutdown(signal.SIGTERM, loop))
+    loop.run_until_complete(shutdown(signal.SIGTERM, loop, node.server))
     loop.close()
 
 

+ 3 - 0
exo/topology/device_capabilities.py

@@ -96,6 +96,9 @@ CHIP_FLOPS = {
   "NVIDIA TITAN RTX": DeviceFlops(fp32=16.31*TFLOPS, fp16=32.62*TFLOPS, int8=65.24*TFLOPS),
   # GTX 10 series
   "NVIDIA GEFORCE GTX 1050 TI": DeviceFlops(fp32=2.0*TFLOPS, fp16=4.0*TFLOPS, int8=8.0*TFLOPS),
+  "NVIDIA GEFORCE GTX 1070": DeviceFlops(fp32=6.463*TFLOPS, fp16=0.101*TFLOPS, int8=25.852*TFLOPS),
+  "NVIDIA GEFORCE GTX 1080": DeviceFlops(fp32=8.873*TFLOPS, fp16=0.138*TFLOPS, int8=35.492*TFLOPS),
+  "NVIDIA GEFORCE GTX 1080 TI": DeviceFlops(fp32=11.34*TFLOPS, fp16=0.177*TFLOPS, int8=45.36*TFLOPS),
   # GTX 16 series
   "NVIDIA GeForce GTX 1660 TI": DeviceFlops(fp32=4.8*TFLOPS, fp16=9.6*TFLOPS, int8=19.2*TFLOPS),
   # QUADRO RTX Ampere series

+ 113 - 0
extra/pipsize.py

@@ -0,0 +1,113 @@
+import os
+import importlib.metadata
+import importlib.util
+import json
+import sys
+
+
+def calc_container(path):
+  """Calculate total size of a directory or file."""
+  if os.path.isfile(path):
+    try:
+      return os.path.getsize(path)
+    except (OSError, FileNotFoundError):
+      return 0
+
+  total_size = 0
+  for dirpath, dirnames, filenames in os.walk(path):
+    for f in filenames:
+      fp = os.path.join(dirpath, f)
+      try:
+        total_size += os.path.getsize(fp)
+      except (OSError, FileNotFoundError):
+        continue
+  return total_size
+
+
+def get_package_location(package_name):
+  """Get the actual location of a package's files."""
+  try:
+    spec = importlib.util.find_spec(package_name)
+    if spec is None:
+      return None
+
+    if spec.submodule_search_locations:
+      # Return the first location for namespace packages
+      return spec.submodule_search_locations[0]
+    elif spec.origin:
+      # For single-file modules, return the file path itself
+      return spec.origin
+  except ImportError:
+    return None
+
+
+def get_package_sizes(min_size_mb=0.1):
+  """Get sizes of installed packages above minimum size threshold."""
+  package_sizes = []
+
+  # Get all installed distributions
+  for dist in importlib.metadata.distributions():
+    try:
+      package_name = dist.metadata["Name"]
+      location = get_package_location(package_name.replace("-", "_"))
+
+      if location and os.path.exists(location):
+        size = calc_container(location)
+        size_mb = size / (1024 * 1024)
+
+        if size_mb > min_size_mb:
+          package_sizes.append((package_name, size))
+    except Exception as e:
+      print(
+        f"Error processing {dist.metadata.get('Name', 'Unknown package')}: {e}"
+      )
+
+  return package_sizes
+
+
+def main():
+  # Get and sort package sizes
+  package_sizes = get_package_sizes()
+  package_sizes.sort(key=lambda x: x[1], reverse=True)
+
+  # Convert sizes to MB and prepare data
+  table_data = [(name, size/(1024*1024)) for name, size in package_sizes]
+  total_size = sum(size for _, size in package_sizes)/(1024*1024)
+
+  # Check if --json flag is present
+  if "--json" in sys.argv:
+    try:
+      output_file = sys.argv[sys.argv.index("--json") + 1]
+      json_data = {
+        "packages": [{
+          "name": name,
+          "size_mb": round(size, 2)
+        } for name, size in table_data],
+        "total_size_mb": round(total_size, 2)
+      }
+
+      with open(output_file, 'w') as f:
+        json.dump(json_data, f, indent=2)
+      print(f"JSON data written to {output_file}")
+      return
+    except IndexError:
+      print("Error: Please provide a filename after --json")
+      sys.exit(1)
+    except Exception as e:
+      print(f"Error writing JSON file: {e}")
+      sys.exit(1)
+
+  # Original table output code
+  max_name_width = max(len(name) for name, _ in table_data)
+  max_name_width = max(max_name_width, len("Package"))
+
+  print(f"\n{'Package':<{max_name_width}} | Size (MB)")
+  print("-" * max_name_width + "-+-" + "-" * 10)
+
+  for name, size in table_data:
+    print(f"{name:<{max_name_width}} | {size:>8.2f}")
+
+  print(f"\nTotal size: {total_size:.2f} MB\n")
+
+if __name__ == "__main__":
+  main()

+ 3 - 5
scripts/build_exo.py

@@ -14,8 +14,8 @@ def run():
         "--follow-imports",
         "--standalone",
         "--output-filename=exo",
-        "--onefile",
-        "--python-flag=no_site"
+        "--python-flag=no_site",
+        "--onefile"
     ]
 
     if sys.platform == "darwin": 
@@ -24,8 +24,6 @@ def run():
             "--macos-app-mode=gui",
             "--macos-app-version=0.0.1",
             "--macos-signed-app-name=com.exolabs.exo",
-            "--macos-sign-identity=auto",
-            "--macos-sign-notarization",
             "--include-distribution-meta=mlx",
             "--include-module=mlx._reprlib_fix",
             "--include-module=mlx._os_warning",
@@ -57,4 +55,4 @@ def run():
         print(f"An error occurred: {e}")
 
 if __name__ == "__main__":
-    run()
+    run()

+ 2 - 2
scripts/compile_grpc.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 source ./install.sh
 pushd exo/networking/grpc
-python3.12 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. node_service.proto
-sed -i "s/import node_service_pb2/from . &/" node_service_pb2_grpc.py
+python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. node_service.proto
+sed -i "s/import\ node_service_pb2/from . &/" node_service_pb2_grpc.py
 popd
 

+ 3 - 3
setup.py

@@ -13,11 +13,11 @@ install_requires = [
   "Jinja2==3.1.4",
   "netifaces==0.11.0",
   "numpy==2.0.0",
-  "nuitka==2.4.10",
+  "nuitka==2.5.1",
   "nvidia-ml-py==12.560.30",
   "pillow==10.4.0",
   "prometheus-client==0.20.0",
-  "protobuf==5.27.1",
+  "protobuf==5.28.1",
   "psutil==6.0.0",
   "pydantic==2.9.2",
   "requests==2.32.3",
@@ -26,7 +26,7 @@ install_requires = [
   "tqdm==4.66.4",
   "transformers==4.46.3",
   "uuid==1.30",
-  "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@232edcfd4f8b388807c64fb1817a7668ce27cbad",
+  "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@3b26e51fcebfc6576f4e0f99693e6f1406d61d79",
 ]
 
 extras_require = {