10 maanden geleden · 17736d7d9e
--- a/README.md
+++ b/README.md
@@ -91,7 +91,7 @@ The current recommended way to install exo is from source.
 
				 ```sh
			
 
				 git clone https://github.com/exo-explore/exo.git
			
 
				 cd exo
			
 
				-pip install .
			
 
				+pip install -e .
			
 
				 # alternatively, with venv
			
 
				 source install.sh
			
 
				 ```
			
@@ -130,13 +130,13 @@ exo starts a ChatGPT-like WebUI (powered by [tinygrad tinychat](https://github.c
 
				 
			
 
				 For developers, exo also starts a ChatGPT-compatible API endpoint on http://localhost:8000/v1/chat/completions. Examples with curl:
			
 
				 
			
 
				-#### Llama 3.1 8B:
			
 
				+#### Llama 3.2 3B:
			
 
				 
			
 
				 ```sh
			
 
				 curl http://localhost:8000/v1/chat/completions \
			
 
				   -H "Content-Type: application/json" \
			
 
				   -d '{
			
 
				-     "model": "llama-3.1-8b",
			
 
				+     "model": "llama-3.2-3b",
			
 
				      "messages": [{"role": "user", "content": "What is the meaning of exo?"}],
			
 
				      "temperature": 0.7
			
 
				    }'
			
@@ -201,6 +201,17 @@ Linux devices will automatically default to using the **tinygrad** inference eng
 
				 
			
 
				 You can read about tinygrad-specific env vars [here](https://docs.tinygrad.org/env_vars/). For example, you can configure tinygrad to use the cpu by specifying `CLANG=1`.
			
 
				 
			
 
				+### Example Usage on a single device with "exo run" command
			
 
				+
			
 
				+```sh
			
 
				+exo run llama-3.2-3b
			
 
				+```
			
 
				+
			
 
				+With a custom prompt:
			
 
				+
			
 
				+```sh
			
 
				+exo run llama-3.2-3b --prompt "What is the meaning of exo?"
			
 
				+```
			
 
				 
			
 
				 ## Debugging
			
 
				 
			
--- a/exo/main.py
+++ b/exo/main.py
@@ -5,6 +5,7 @@ import json
 
				 import time
			
 
				 import traceback
			
 
				 import uuid
			
 
				+import sys
			
 
				 from exo.orchestration.standard_node import StandardNode
			
 
				 from exo.networking.grpc.grpc_server import GRPCServer
			
 
				 from exo.networking.udp.udp_discovery import UDPDiscovery
			
@@ -24,6 +25,8 @@ from exo.viz.topology_viz import TopologyViz
 
				 
			
 
				 # parse args
			
 
				 parser = argparse.ArgumentParser(description="Initialize GRPC Discovery")
			
 
				+parser.add_argument("command", nargs="?", choices=["run"], help="Command to run")
			
 
				+parser.add_argument("model_name", nargs="?", help="Model name to run")
			
 
				 parser.add_argument("--node-id", type=str, default=None, help="Node ID")
			
 
				 parser.add_argument("--node-host", type=str, default="0.0.0.0", help="Node host")
			
 
				 parser.add_argument("--node-port", type=int, default=None, help="Node port")
			
@@ -179,8 +182,12 @@ async def main():
 
				 
			
 
				   await node.start(wait_for_peers=args.wait_for_peers)
			
 
				 
			
 
				-  if args.run_model:
			
 
				-    await run_model_cli(node, inference_engine, args.run_model, args.prompt)
			
 
				+  if args.command == "run" or args.run_model:
			
 
				+    model_name = args.model_name or args.run_model
			
 
				+    if not model_name:
			
 
				+      print("Error: Model name is required when using 'run' command or --run-model")
			
 
				+      return
			
 
				+    await run_model_cli(node, inference_engine, model_name, args.prompt)
			
 
				   else:
			
 
				     asyncio.create_task(api.run(port=args.chatgpt_api_port))  # Start the API server as a non-blocking task
			
 
				     await asyncio.Event().wait()
			
--- a/install.sh
+++ b/install.sh
@@ -2,4 +2,4 @@
 
				 
			
 
				 python3 -m venv .venv
			
 
				 source .venv/bin/activate
			
 
				-pip install .
			
 
				+pip install -e .