Forráskód Böngészése

Merge remote-tracking branch 'upstream/main'

DevEmilio96 7 hónapja
szülő
commit
425bd214eb
39 módosított fájl, 51 hozzáadás és 45 törlés
  1. 4 4
      .circleci/config.yml
  2. 26 23
      README.md
  3. 1 1
      exo/api/chatgpt_api.py
  4. 1 1
      exo/helpers.py
  5. 0 0
      exo/inference/tinygrad/__init__.py
  6. 2 5
      exo/inference/tinygrad/inference.py
  7. 0 0
      exo/inference/tinygrad/models/__init__.py
  8. 13 3
      exo/main.py
  9. 0 0
      exo/networking/tailscale/__init__.py
  10. 0 0
      exo/networking/udp/__init__.py
  11. 0 0
      exo/tinychat/common.css
  12. 0 0
      exo/tinychat/favicon.svg
  13. 0 0
      exo/tinychat/index.css
  14. 1 1
      exo/tinychat/index.html
  15. 0 0
      exo/tinychat/index.js
  16. 0 0
      exo/tinychat/static/cdn.jsdelivr.net/npm/@alpine-collective/toolkit@1.0.2/dist/cdn.min.js
  17. 0 0
      exo/tinychat/static/cdn.jsdelivr.net/npm/@alpinejs/focus@3.x.x/dist/cdn.min.js
  18. 0 0
      exo/tinychat/static/cdn.jsdelivr.net/npm/@alpinejs/intersect@3.x.x/dist/cdn.min.js
  19. 0 0
      exo/tinychat/static/cdn.jsdelivr.net/npm/purecss@3.0.0/build/base-min.css
  20. 0 0
      exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/css/all.min.css
  21. 0 0
      exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-brands-400.ttf
  22. 0 0
      exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-brands-400.woff2
  23. 0 0
      exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-regular-400.ttf
  24. 0 0
      exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-regular-400.woff2
  25. 0 0
      exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-solid-900.ttf
  26. 0 0
      exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-solid-900.woff2
  27. 0 0
      exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-v4compatibility.ttf
  28. 0 0
      exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-v4compatibility.woff2
  29. 0 0
      exo/tinychat/static/fonts.googleapis.com/css2
  30. 0 0
      exo/tinychat/static/unpkg.com/@highlightjs/cdn-assets@11.9.0/highlight.min.js
  31. 0 0
      exo/tinychat/static/unpkg.com/@highlightjs/cdn-assets@11.9.0/styles/vs2015.min.css
  32. 0 0
      exo/tinychat/static/unpkg.com/@marcreichel/alpine-autosize@1.3.x/dist/alpine-autosize.min.js
  33. 0 0
      exo/tinychat/static/unpkg.com/alpinejs@3.x.x/dist/cdn.min.js
  34. 0 0
      exo/tinychat/static/unpkg.com/dompurify@3.1.5/dist/purify.min.js
  35. 0 0
      exo/tinychat/static/unpkg.com/marked-highlight@2.1.2/lib/index.umd.js
  36. 0 0
      exo/tinychat/static/unpkg.com/marked@13.0.0/marked.min.js
  37. 0 0
      exo/tinychat/update_deps.py
  38. 1 1
      install.sh
  39. 2 6
      setup.py

+ 4 - 4
.circleci/config.yml

@@ -17,11 +17,11 @@ commands:
             source env/bin/activate
 
             # Start first instance
-            HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 python3 main.py --inference-engine <<parameters.inference_engine>> --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout 900 2>&1 | tee output1.log &
+            HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout 900 2>&1 | tee output1.log &
             PID1=$!
 
             # Start second instance
-            HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 python3 main.py --inference-engine <<parameters.inference_engine>> --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout 900 2>&1 | tee output2.log &
+            HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout 900 2>&1 | tee output2.log &
             PID2=$!
 
             # Wait for discovery
@@ -138,9 +138,9 @@ jobs:
           name: Run discovery integration test
           command: |
             source env/bin/activate
-            DEBUG_DISCOVERY=7 DEBUG=7 python3 main.py --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 &
+            DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 &
             PID1=$!
-            DEBUG_DISCOVERY=7 DEBUG=7 python3 main.py --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 &
+            DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 &
             PID2=$!
             sleep 10
             kill $PID1 $PID2

+ 26 - 23
README.md

@@ -79,19 +79,11 @@ The current recommended way to install exo is from source.
 
 ### Hardware Requirements
 
-| Component          | MLX Requirements                                              | TinyGrad Requirements (for Llama-3.1-8B or similar)                    |
-|--------------------|---------------------------------------------------------------|------------------------------------------------------------------------|
-| **CPU**            | Apple Silicon (M1, M2, or later) only                         | Minimum: Intel Core i7-12700 or AMD Ryzen 7 5800X <br>Recommended: Intel Core i9-12900K or AMD Ryzen 9 5900X |
-| **GPU**            | Apple Silicon Integrated GPU                                  | Minimum: NVIDIA RTX 4070 (12 GB VRAM) <br>Recommended: NVIDIA RTX 4080 (16 GB VRAM)  |
-| **RAM**            | Minimum: 16 GB <br>Recommended: 32 GB                         | Minimum: 32 GB <br>Recommended: 64 GB                                  |
-| **Storage**        | Minimum: 256 GB SSD <br>Recommended: 512 GB SSD               | Minimum: 512 GB SSD <br>Recommended: 1 TB SSD                          |
-| **Operating System**| macOS (Big Sur)                               | Ubuntu                                              |
-
-**Note**:  
-- For **MLX**, you can currently run **smaller models** such as **Llama-3.2-1B**, which are optimized for Apple Silicon hardware.
-- For **TinyGrad**, the **smallest model** currently supported is **Llama-3.1-8B**, which requires more robust hardware to run effectively.
-- **Hardware requirements are indicative**: The overall load is distributed across the **CPU, RAM**, and **GPU/VRAM**, not solely on the GPU. Therefore, your system's performance depends on its ability to handle this distribution effectively.
-- It is also **possible to run models in a cluster mode**, utilizing multiple devices to distribute the computation load across multiple machines or GPUs, enhancing performance.
+- The only requirement to run exo is to have enough memory across all your devices to fit the entire model into memory. For example, if you are running llama 3.1 8B (fp16), you need 16GB of memory across all devices. Any of the following configurations would work since they each have more than 16GB of memory in total:
+  - 2 x 8GB M3 MacBook Airs
+  - 1 x 16GB NVIDIA RTX 4070 Ti Laptop
+  - 2 x Raspberry Pi 400 with 4GB of RAM each (running on CPU) + 1 x 8GB Mac Mini
+- exo is designed to run on devices with heterogeneous capabilities. For example, you can have some devices with powerful GPUs and others with integrated GPUs or even CPUs. Adding less capable devices will slow down individual inference latency but will increase the overall throughput of the cluster.
 
 ### From source
 
@@ -99,7 +91,7 @@ The current recommended way to install exo is from source.
 ```sh
 git clone https://github.com/exo-explore/exo.git
 cd exo
-pip install .
+pip install -e .
 # alternatively, with venv
 source install.sh
 ```
@@ -124,12 +116,12 @@ source install.sh
 #### Device 1:
 
 ```sh
-python3 main.py
+exo
 ```
 
 #### Device 2:
 ```sh
-python3 main.py
+exo
 ```
 
 That's it! No configuration required - exo will automatically discover the other device(s).
@@ -138,13 +130,13 @@ exo starts a ChatGPT-like WebUI (powered by [tinygrad tinychat](https://github.c
 
 For developers, exo also starts a ChatGPT-compatible API endpoint on http://localhost:8000/v1/chat/completions. Examples with curl:
 
-#### Llama 3.1 8B:
+#### Llama 3.2 3B:
 
 ```sh
 curl http://localhost:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-     "model": "llama-3.1-8b",
+     "model": "llama-3.2-3b",
      "messages": [{"role": "user", "content": "What is the meaning of exo?"}],
      "temperature": 0.7
    }'
@@ -195,38 +187,49 @@ curl http://localhost:8000/v1/chat/completions \
 #### Device 1 (MacOS):
 
 ```sh
-python3 main.py --inference-engine tinygrad
+exo --inference-engine tinygrad
 ```
 
 Here we explicitly tell exo to use the **tinygrad** inference engine.
 
 #### Device 2 (Linux):
 ```sh
-python3 main.py
+exo
 ```
 
 Linux devices will automatically default to using the **tinygrad** inference engine.
 
 You can read about tinygrad-specific env vars [here](https://docs.tinygrad.org/env_vars/). For example, you can configure tinygrad to use the cpu by specifying `CLANG=1`.
 
+### Example Usage on a single device with "exo run" command
+
+```sh
+exo run llama-3.2-3b
+```
+
+With a custom prompt:
+
+```sh
+exo run llama-3.2-3b --prompt "What is the meaning of exo?"
+```
 
 ## Debugging
 
 Enable debug logs with the DEBUG environment variable (0-9).
 
 ```sh
-DEBUG=9 python3 main.py
+DEBUG=9 exo
 ```
 
 For the **tinygrad** inference engine specifically, there is a separate DEBUG flag `TINYGRAD_DEBUG` that can be used to enable debug logs (1-6).
 
 ```sh
-TINYGRAD_DEBUG=2 python3 main.py
+TINYGRAD_DEBUG=2 exo
 ```
 
 ## Known Issues
 
-- On some versions of MacOS/Python, certificates are not installed properly which can lead to SSL errors (e.g. SSL error with huggingface.co). To fix this, run the Install Certificates command, usually: 
+- On some versions of MacOS/Python, certificates are not installed properly which can lead to SSL errors (e.g. SSL error with huggingface.co). To fix this, run the Install Certificates command, usually:
 
 ```sh
 /Applications/Python 3.x/Install Certificates.command

+ 1 - 1
exo/api/chatgpt_api.py

@@ -179,7 +179,7 @@ class ChatGPTAPI:
     # Endpoint for download progress tracking
     cors.add(self.app.router.add_get("/v1/download/progress", self.handle_get_download_progress), {"*": cors_options})
 
-    self.static_dir = Path(__file__).parent.parent.parent/"tinychat/examples/tinychat"
+    self.static_dir = Path(__file__).parent.parent/"tinychat"
     self.app.router.add_get("/", self.handle_root)
     self.app.router.add_static("/", self.static_dir, name="static")
 

+ 1 - 1
exo/helpers.py

@@ -170,7 +170,7 @@ def is_valid_uuid(val):
 
 
 def get_or_create_node_id():
-  NODE_ID_FILE = Path(os.path.dirname(os.path.abspath(__file__)))/".exo_node_id"
+  NODE_ID_FILE = Path(tempfile.gettempdir()) / ".exo_node_id"
   try:
     if NODE_ID_FILE.is_file():
       with open(NODE_ID_FILE, "r") as f:

+ 0 - 0
exo/inference/tinygrad/__init__.py


+ 2 - 5
exo/inference/tinygrad/inference.py

@@ -4,9 +4,8 @@ import os
 from exo.inference.tinygrad.models.llama import Transformer, convert_from_huggingface, fix_bf16
 from exo.inference.shard import Shard
 from exo.inference.tokenizers import resolve_tokenizer
-from tinygrad.nn.state import safe_load, torch_load, load_state_dict
-from tinygrad import Tensor, dtypes, nn, Context
-from transformers import AutoTokenizer
+from tinygrad.nn.state import load_state_dict
+from tinygrad import Tensor, nn, Context
 from exo.inference.inference_engine import InferenceEngine
 from typing import Optional, Tuple
 import numpy as np
@@ -14,8 +13,6 @@ from exo.inference.tinygrad.tinygrad_helpers import concat_weights, load
 from exo.download.shard_download import ShardDownloader
 from concurrent.futures import ThreadPoolExecutor
 import asyncio
-import threading
-from functools import partial
 
 Tensor.no_grad = True
 # default settings

+ 0 - 0
exo/inference/tinygrad/models/__init__.py


+ 13 - 3
main.py → exo/main.py

@@ -5,6 +5,7 @@ import json
 import time
 import traceback
 import uuid
+import sys
 from exo.orchestration.standard_node import StandardNode
 from exo.networking.grpc.grpc_server import GRPCServer
 from exo.networking.udp.udp_discovery import UDPDiscovery
@@ -24,6 +25,8 @@ from exo.viz.topology_viz import TopologyViz
 
 # parse args
 parser = argparse.ArgumentParser(description="Initialize GRPC Discovery")
+parser.add_argument("command", nargs="?", choices=["run"], help="Command to run")
+parser.add_argument("model_name", nargs="?", help="Model name to run")
 parser.add_argument("--node-id", type=str, default=None, help="Node ID")
 parser.add_argument("--node-host", type=str, default="0.0.0.0", help="Node host")
 parser.add_argument("--node-port", type=int, default=None, help="Node port")
@@ -186,14 +189,18 @@ async def main():
 
   await node.start(wait_for_peers=args.wait_for_peers)
 
-  if args.run_model:
-    await run_model_cli(node, inference_engine, args.run_model, args.prompt)
+  if args.command == "run" or args.run_model:
+    model_name = args.model_name or args.run_model
+    if not model_name:
+      print("Error: Model name is required when using 'run' command or --run-model")
+      return
+    await run_model_cli(node, inference_engine, model_name, args.prompt)
   else:
     asyncio.create_task(api.run(port=args.chatgpt_api_port))  # Start the API server as a non-blocking task
     await asyncio.Event().wait()
 
 
-if __name__ == "__main__":
+def run():
   loop = asyncio.new_event_loop()
   asyncio.set_event_loop(loop)
   try:
@@ -203,3 +210,6 @@ if __name__ == "__main__":
   finally:
     loop.run_until_complete(shutdown(signal.SIGTERM, loop))
     loop.close()
+
+if __name__ == "__main__":
+  run()

+ 0 - 0
exo/networking/tailscale/__init__.py


+ 0 - 0
exo/networking/udp/__init__.py


+ 0 - 0
tinychat/examples/tinychat/common.css → exo/tinychat/common.css


+ 0 - 0
tinychat/examples/tinychat/favicon.svg → exo/tinychat/favicon.svg


+ 0 - 0
tinychat/examples/tinychat/index.css → exo/tinychat/index.css


+ 1 - 1
tinychat/examples/tinychat/index.html → exo/tinychat/index.html

@@ -22,7 +22,7 @@
 <link href="/static/unpkg.com/@highlightjs/cdn-assets@11.9.0/styles/vs2015.min.css" rel="stylesheet"/>
 <link href="/index.css" rel="stylesheet"/>
 <link href="/common.css" rel="stylesheet"/>
-</link></head>
+</head>
 <body>
 <main x-data="state" x-init="console.log(endpoint)">
      <!-- Error Toast -->

+ 0 - 0
tinychat/examples/tinychat/index.js → exo/tinychat/index.js


+ 0 - 0
tinychat/examples/tinychat/static/cdn.jsdelivr.net/npm/@alpine-collective/toolkit@1.0.2/dist/cdn.min.js → exo/tinychat/static/cdn.jsdelivr.net/npm/@alpine-collective/toolkit@1.0.2/dist/cdn.min.js


+ 0 - 0
tinychat/examples/tinychat/static/cdn.jsdelivr.net/npm/@alpinejs/focus@3.x.x/dist/cdn.min.js → exo/tinychat/static/cdn.jsdelivr.net/npm/@alpinejs/focus@3.x.x/dist/cdn.min.js


+ 0 - 0
tinychat/examples/tinychat/static/cdn.jsdelivr.net/npm/@alpinejs/intersect@3.x.x/dist/cdn.min.js → exo/tinychat/static/cdn.jsdelivr.net/npm/@alpinejs/intersect@3.x.x/dist/cdn.min.js


+ 0 - 0
tinychat/examples/tinychat/static/cdn.jsdelivr.net/npm/purecss@3.0.0/build/base-min.css → exo/tinychat/static/cdn.jsdelivr.net/npm/purecss@3.0.0/build/base-min.css


+ 0 - 0
tinychat/examples/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/css/all.min.css → exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/css/all.min.css


+ 0 - 0
tinychat/examples/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-brands-400.ttf → exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-brands-400.ttf


+ 0 - 0
tinychat/examples/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-brands-400.woff2 → exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-brands-400.woff2


+ 0 - 0
tinychat/examples/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-regular-400.ttf → exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-regular-400.ttf


+ 0 - 0
tinychat/examples/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-regular-400.woff2 → exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-regular-400.woff2


+ 0 - 0
tinychat/examples/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-solid-900.ttf → exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-solid-900.ttf


+ 0 - 0
tinychat/examples/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-solid-900.woff2 → exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-solid-900.woff2


+ 0 - 0
tinychat/examples/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-v4compatibility.ttf → exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-v4compatibility.ttf


+ 0 - 0
tinychat/examples/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-v4compatibility.woff2 → exo/tinychat/static/cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/fa-v4compatibility.woff2


+ 0 - 0
tinychat/examples/tinychat/static/fonts.googleapis.com/css2 → exo/tinychat/static/fonts.googleapis.com/css2


+ 0 - 0
tinychat/examples/tinychat/static/unpkg.com/@highlightjs/cdn-assets@11.9.0/highlight.min.js → exo/tinychat/static/unpkg.com/@highlightjs/cdn-assets@11.9.0/highlight.min.js


+ 0 - 0
tinychat/examples/tinychat/static/unpkg.com/@highlightjs/cdn-assets@11.9.0/styles/vs2015.min.css → exo/tinychat/static/unpkg.com/@highlightjs/cdn-assets@11.9.0/styles/vs2015.min.css


+ 0 - 0
tinychat/examples/tinychat/static/unpkg.com/@marcreichel/alpine-autosize@1.3.x/dist/alpine-autosize.min.js → exo/tinychat/static/unpkg.com/@marcreichel/alpine-autosize@1.3.x/dist/alpine-autosize.min.js


+ 0 - 0
tinychat/examples/tinychat/static/unpkg.com/alpinejs@3.x.x/dist/cdn.min.js → exo/tinychat/static/unpkg.com/alpinejs@3.x.x/dist/cdn.min.js


+ 0 - 0
tinychat/examples/tinychat/static/unpkg.com/dompurify@3.1.5/dist/purify.min.js → exo/tinychat/static/unpkg.com/dompurify@3.1.5/dist/purify.min.js


+ 0 - 0
tinychat/examples/tinychat/static/unpkg.com/marked-highlight@2.1.2/lib/index.umd.js → exo/tinychat/static/unpkg.com/marked-highlight@2.1.2/lib/index.umd.js


+ 0 - 0
tinychat/examples/tinychat/static/unpkg.com/marked@13.0.0/marked.min.js → exo/tinychat/static/unpkg.com/marked@13.0.0/marked.min.js


+ 0 - 0
tinychat/examples/tinychat/update_deps.py → exo/tinychat/update_deps.py


+ 1 - 1
install.sh

@@ -2,4 +2,4 @@
 
 python3 -m venv .venv
 source .venv/bin/activate
-pip install .
+pip install -e .

+ 2 - 6
setup.py

@@ -7,12 +7,8 @@ install_requires = [
   "aiohttp==3.10.2",
   "aiohttp_cors==0.7.0",
   "aiofiles==24.1.0",
-  "blobfile==2.1.1",
   "grpcio==1.64.1",
   "grpcio-tools==1.64.1",
-  "hf-transfer==0.1.8",
-  "huggingface-hub==0.24.5",
-  "Jinja2==3.1.4",
   "netifaces==0.11.0",
   "numpy==2.0.0",
   "pillow==10.4.0",
@@ -25,8 +21,6 @@ install_requires = [
   "safetensors==0.4.3",
   "tailscale==0.6.1",
   "tenacity==9.0.0",
-  "tiktoken==0.7.0",
-  "tokenizers==0.19.1",
   "tqdm==4.66.4",
   "transformers==4.43.3",
   "uuid==1.30",
@@ -55,4 +49,6 @@ setup(
   packages=find_packages(),
   install_requires=install_requires,
   extras_require=extras_require,
+  package_data={"exo": ["tinychat/**/*"]},
+  entry_points={"console_scripts": ["exo = exo.main:run"]},
 )