Prechádzať zdrojové kódy

Merge pull request #261 from exo-explore/exo_run_cmd

Exo run cmd
Alex Cheema 7 mesiacov pred
rodič
commit
17736d7d9e
3 zmenil súbory, kde vykonal 24 pridanie a 6 odobranie
  1. 14 3
      README.md
  2. 9 2
      exo/main.py
  3. 1 1
      install.sh

+ 14 - 3
README.md

@@ -91,7 +91,7 @@ The current recommended way to install exo is from source.
 ```sh
 git clone https://github.com/exo-explore/exo.git
 cd exo
-pip install .
+pip install -e .
 # alternatively, with venv
 source install.sh
 ```
@@ -130,13 +130,13 @@ exo starts a ChatGPT-like WebUI (powered by [tinygrad tinychat](https://github.c
 
 For developers, exo also starts a ChatGPT-compatible API endpoint on http://localhost:8000/v1/chat/completions. Examples with curl:
 
-#### Llama 3.1 8B:
+#### Llama 3.2 3B:
 
 ```sh
 curl http://localhost:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-     "model": "llama-3.1-8b",
+     "model": "llama-3.2-3b",
      "messages": [{"role": "user", "content": "What is the meaning of exo?"}],
      "temperature": 0.7
    }'
@@ -201,6 +201,17 @@ Linux devices will automatically default to using the **tinygrad** inference eng
 
 You can read about tinygrad-specific env vars [here](https://docs.tinygrad.org/env_vars/). For example, you can configure tinygrad to use the cpu by specifying `CLANG=1`.
 
+### Example Usage on a single device with "exo run" command
+
+```sh
+exo run llama-3.2-3b
+```
+
+With a custom prompt:
+
+```sh
+exo run llama-3.2-3b --prompt "What is the meaning of exo?"
+```
 
 ## Debugging
 

+ 9 - 2
exo/main.py

@@ -5,6 +5,7 @@ import json
 import time
 import traceback
 import uuid
+import sys
 from exo.orchestration.standard_node import StandardNode
 from exo.networking.grpc.grpc_server import GRPCServer
 from exo.networking.udp.udp_discovery import UDPDiscovery
@@ -24,6 +25,8 @@ from exo.viz.topology_viz import TopologyViz
 
 # parse args
 parser = argparse.ArgumentParser(description="Initialize GRPC Discovery")
+parser.add_argument("command", nargs="?", choices=["run"], help="Command to run")
+parser.add_argument("model_name", nargs="?", help="Model name to run")
 parser.add_argument("--node-id", type=str, default=None, help="Node ID")
 parser.add_argument("--node-host", type=str, default="0.0.0.0", help="Node host")
 parser.add_argument("--node-port", type=int, default=None, help="Node port")
@@ -179,8 +182,12 @@ async def main():
 
   await node.start(wait_for_peers=args.wait_for_peers)
 
-  if args.run_model:
-    await run_model_cli(node, inference_engine, args.run_model, args.prompt)
+  if args.command == "run" or args.run_model:
+    model_name = args.model_name or args.run_model
+    if not model_name:
+      print("Error: Model name is required when using 'run' command or --run-model")
+      return
+    await run_model_cli(node, inference_engine, model_name, args.prompt)
   else:
     asyncio.create_task(api.run(port=args.chatgpt_api_port))  # Start the API server as a non-blocking task
     await asyncio.Event().wait()

+ 1 - 1
install.sh

@@ -2,4 +2,4 @@
 
 python3 -m venv .venv
 source .venv/bin/activate
-pip install .
+pip install -e .