7 months ago · d154d37ac4
--- a/.github/bench.py
+++ b/.github/bench.py
@@ -0,0 +1,116 @@
 
				+import aiohttp
			
 
				+import asyncio
			
 
				+import time
			
 
				+import json
			
 
				+import os
			
 
				+from typing import Dict, Any
			
 
				+
			
 
				+
			
 
				+async def measure_performance(api_endpoint: str, prompt: str) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    Measures the performance of an API endpoint by sending a prompt and recording metrics.
			
 
				+
			
 
				+    Args:
			
 
				+        api_endpoint (str): The API endpoint URL.
			
 
				+        prompt (str): The prompt to send to the API.
			
 
				+
			
 
				+    Returns:
			
 
				+        Dict[str, Any]: A dictionary containing performance metrics or error information.
			
 
				+    """
			
 
				+    results: Dict[str, Any] = {}
			
 
				+    request_payload = {
			
 
				+        "model": "llama-3.2-3b",
			
 
				+        "messages": [{"role": "user", "content": prompt}],
			
 
				+        "temperature": 0,
			
 
				+        "stream": True
			
 
				+    }
			
 
				+
			
 
				+    async with aiohttp.ClientSession() as session:
			
 
				+        try:
			
 
				+            start_time = time.time()
			
 
				+            first_token_time = None
			
 
				+            total_tokens = 0
			
 
				+
			
 
				+            async with session.post(api_endpoint, json=request_payload) as response:
			
 
				+                if response.status != 200:
			
 
				+                    results["error"] = f"HTTP {response.status}: {response.reason}"
			
 
				+                    return results
			
 
				+
			
 
				+                async for raw_line in response.content:
			
 
				+                    line = raw_line.decode('utf-8').strip()
			
 
				+                    if not line or not line.startswith('data: '):
			
 
				+                        continue
			
 
				+
			
 
				+                    line_content = line[6:]  # Remove 'data: ' prefix
			
 
				+                    if line_content == '[DONE]':
			
 
				+                        break
			
 
				+
			
 
				+                    try:
			
 
				+                        chunk = json.loads(line_content)
			
 
				+                        choice = chunk.get('choices', [{}])[0]
			
 
				+                        content = choice.get('delta', {}).get('content')
			
 
				+
			
 
				+                        if content:
			
 
				+                            if first_token_time is None:
			
 
				+                                first_token_time = time.time()
			
 
				+                                results["time_to_first_token"] = first_token_time - start_time
			
 
				+
			
 
				+                            total_tokens += 1
			
 
				+                    except json.JSONDecodeError:
			
 
				+                        # Log or handle malformed JSON if necessary
			
 
				+                        continue
			
 
				+
			
 
				+            end_time = time.time()
			
 
				+            total_time = end_time - start_time
			
 
				+
			
 
				+            if total_tokens > 0:
			
 
				+                results.update({
			
 
				+                    "tokens_per_second": total_tokens / total_time,
			
 
				+                    "total_tokens": total_tokens,
			
 
				+                    "total_time": total_time
			
 
				+                })
			
 
				+            else:
			
 
				+                results["error"] = "No tokens were generated"
			
 
				+
			
 
				+        except aiohttp.ClientError as e:
			
 
				+            results["error"] = f"Client error: {e}"
			
 
				+        except Exception as e:
			
 
				+            results["error"] = f"Unexpected error: {e}"
			
 
				+
			
 
				+    return results
			
 
				+
			
 
				+
			
 
				+async def main() -> None:
			
 
				+    api_endpoint = "http://localhost:52415/v1/chat/completions"
			
 
				+
			
 
				+    # Define prompts
			
 
				+    prompt_basic = "this is a ping"
			
 
				+    prompt_essay = "write an essay about cats"
			
 
				+
			
 
				+    # Measure performance for the basic prompt
			
 
				+    print("Measuring performance for the basic prompt...")
			
 
				+    results_basic = await measure_performance(api_endpoint, prompt_basic)
			
 
				+    print("Basic prompt performance metrics:")
			
 
				+    print(json.dumps(results_basic, indent=4))
			
 
				+
			
 
				+    # Measure performance for the essay prompt, which depends on the first measurement
			
 
				+    print("\nMeasuring performance for the essay prompt...")
			
 
				+    results = await measure_performance(api_endpoint, prompt_essay)
			
 
				+
			
 
				+    # Save metrics from the "universe and everything" prompt
			
 
				+    metrics_file = os.path.join("artifacts", "benchmark.json")
			
 
				+    os.makedirs(os.path.dirname(metrics_file), exist_ok=True)
			
 
				+    try:
			
 
				+        with open(metrics_file, "w", encoding="utf-8") as f:
			
 
				+            json.dump(results, f, indent=4)
			
 
				+        print(f"Performance metrics saved to {metrics_file}")
			
 
				+    except IOError as e:
			
 
				+        print(f"Failed to save metrics: {e}")
			
 
				+
			
 
				+    # Optionally print the metrics for visibility
			
 
				+    print("Performance metrics:")
			
 
				+    print(json.dumps(results, indent=4))
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    asyncio.run(main()) 
			
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -19,6 +19,63 @@ jobs:
 
				         cpu: ['M3', 'M4']
			
 
				     runs-on: ['self-hosted', 'macOS', '${{ matrix.cpu }}']
			
 
				     steps:
			
 
				+      - uses: actions/checkout@v4
			
 
				+
			
 
				+      - name: Set up Python
			
 
				+        uses: actions/setup-python@v5
			
 
				+        with:
			
 
				+          python-version: ${{ env.PYTHON_VERSION }}
			
 
				+
			
 
				+      - name: Install dependencies
			
 
				+        run: |
			
 
				+          python -m venv env
			
 
				+          source env/bin/activate
			
 
				+          pip install --upgrade pip
			
 
				+          pip install .
			
 
				+
			
 
				+      - name: Run discovery integration test
			
 
				+        run: |
			
 
				+          source env/bin/activate
			
 
				+          ALL_NODE_IDS=$(for i in $(seq 0 ${{ strategy.job-total }} -1); do echo -n "${GITHUB_JOB}_${i},"; done | sed 's/,$//')
			
 
				+          MY_NODE_ID="${GITHUB_JOB}_${{ strategy.job-index }}"
			
 
				+          DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id="${MY_NODE_ID}" --filter-node-ids="${ALL_NODE_IDS}" --chatgpt-api-port 52415 --disable-tui > output1.log 2>&1 &
			
 
				+          PID1=$!
			
 
				+          tail -f output1.log &
			
 
				+          TAIL1=$!
			
 
				+
			
 
				+          trap 'kill $TAIL1' EXIT
			
 
				+
			
 
				+          for i in {1..100}; do
			
 
				+            nodes=$(curl http://localhost:52415/topology | jq .nodes | length)
			
 
				+            if [ "$nodes" -eq "${{ strategy.job-total }}" ]; then
			
 
				+              break
			
 
				+            fi
			
 
				+            sleep 5
			
 
				+          done
			
 
				+
			
 
				+          if ! kill -0 $PID1 2>/dev/null; then
			
 
				+              echo "Instance (PID $PID1) died unexpectedly. Log output:"
			
 
				+              cat output1.log
			
 
				+              exit 1
			
 
				+          fi
			
 
				+
			
 
				+          
			
 
				+
			
 
				+          kill $PID1 $PID2
			
 
				+          if grep -q "Peer statuses: {.*'node2': 'is_connected=True, health_check=True" output1.log && \
			
 
				+             ! grep -q "Failed to connect peers:" output1.log && \
			
 
				+             grep -q "Peer statuses: {.*'node1': 'is_connected=True, health_check=True" output2.log && \
			
 
				+             ! grep -q "Failed to connect peers:" output2.log; then
			
 
				+            echo "Test passed: Both instances discovered each other"
			
 
				+            exit 0
			
 
				+          else
			
 
				+            echo "Test failed: Devices did not discover each other"
			
 
				+            echo "Output of first instance:"
			
 
				+            cat output1.log
			
 
				+            echo "Output of second instance:"
			
 
				+            cat output2.log
			
 
				+            exit 1
			
 
				+          fi
			
 
				       - name: Test
			
 
				         run: |
			
 
				           echo "GITHUB_JOB: ${GITHUB_JOB}, GITHUB_RUN_ID: ${GITHUB_RUN_ID}, GITHUB_RUN_NUMBER: ${GITHUB_RUN_NUMBER}, GITHUB_WORKFLOW: ${GITHUB_WORKFLOW}"