7 months ago · 8302fd0aae
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -13,285 +13,9 @@ env:
 
				   PYTHONPATH: "."
			
 
				 
			
 
				 jobs:
			
 
				-  check_line_count:
			
 
				-    runs-on: ubuntu-latest
			
 
				+  check_local_runner:
			
 
				+    runs-on: [self-hosted, macOS]
			
 
				     steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				-        with:
			
 
				-          fetch-depth: 0
			
 
				-
			
 
				-      - name: Set up Python
			
 
				-        uses: actions/setup-python@v5
			
 
				-        with:
			
 
				-          python-version: ${{ env.PYTHON_VERSION }}
			
 
				-
			
 
				-      - name: Install dependencies
			
 
				-        run: |
			
 
				-          python -m pip install --upgrade pip
			
 
				-          pip install tabulate
			
 
				-
			
 
				-      - name: Run line count check
			
 
				-        run: |
			
 
				-          if [[ -n "${{ github.event.pull_request }}" ]]; then
			
 
				-            git fetch origin ${{ github.base_ref }}
			
 
				-            git clone -b ${{ github.base_ref }} --single-branch \
			
 
				-              https://github.com/${{ github.repository }}.git base_branch
			
 
				-            python extra/line_counter.py base_branch .
			
 
				-          else
			
 
				-            python extra/line_counter.py .
			
 
				-          fi
			
 
				-
			
 
				-      - name: Upload artifacts
			
 
				-        uses: actions/upload-artifact@v4
			
 
				-        with:
			
 
				-          name: line-count-results
			
 
				-          path: |
			
 
				-            line-count-snapshot.json
			
 
				-            line-count-diff.json
			
 
				-
			
 
				-  unit_test:
			
 
				-    runs-on: macos-15
			
 
				-    timeout-minutes: 20
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				-
			
 
				-      - name: Set up Python
			
 
				-        uses: actions/setup-python@v5
			
 
				-        with:
			
 
				-          python-version: ${{ env.PYTHON_VERSION }}
			
 
				-
			
 
				-      # - name: Cache python packages
			
 
				-      #   uses: actions/cache@v4
			
 
				-      #   with:
			
 
				-      #     path: ${{ env.Python3_ROOT_DIR }}/lib/python3.12/site-packages
			
 
				-      #     key: testing-packages-${{ hashFiles('**/setup.py') }}
			
 
				-
			
 
				-      - name: Install dependencies
			
 
				-        run: |
			
 
				-          python -m venv env
			
 
				-          source env/bin/activate
			
 
				-          pip install --upgrade pip
			
 
				-          pip install llvmlite
			
 
				-          pip install .
			
 
				-
			
 
				-      - name: Basic import test
			
 
				+      - name: Test
			
 
				         run: |
			
 
				-          source env/bin/activate
			
 
				-          python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"
			
 
				-
			
 
				-      - name: Run tests
			
 
				-        run: |
			
 
				-          source env/bin/activate
			
 
				-          METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=1 METAL_XCODE=1 TEMPERATURE=0 python3 -m exo.inference.test_inference_engine
			
 
				-          python3 ./test/test_tokenizers.py
			
 
				-          python3 ./test/test_model_helpers.py
			
 
				-
			
 
				-  discovery_integration_test:
			
 
				-    runs-on: ubuntu-latest
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				-
			
 
				-      - name: Set up Python
			
 
				-        uses: actions/setup-python@v5
			
 
				-        with:
			
 
				-          python-version: ${{ env.PYTHON_VERSION }}
			
 
				-
			
 
				-      - name: Install dependencies
			
 
				-        run: |
			
 
				-          python -m venv env
			
 
				-          source env/bin/activate
			
 
				-          pip install --upgrade pip
			
 
				-          pip install .
			
 
				-
			
 
				-      - name: Run discovery integration test
			
 
				-        run: |
			
 
				-          source env/bin/activate
			
 
				-          DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --disable-tui > output1.log 2>&1 &
			
 
				-          PID1=$!
			
 
				-          DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --disable-tui > output2.log 2>&1 &
			
 
				-          PID2=$!
			
 
				-          sleep 10
			
 
				-          kill $PID1 $PID2
			
 
				-          if grep -q "Peer statuses: {.*'node2': 'is_connected=True, health_check=True" output1.log && \
			
 
				-             ! grep -q "Failed to connect peers:" output1.log && \
			
 
				-             grep -q "Peer statuses: {.*'node1': 'is_connected=True, health_check=True" output2.log && \
			
 
				-             ! grep -q "Failed to connect peers:" output2.log; then
			
 
				-            echo "Test passed: Both instances discovered each other"
			
 
				-            exit 0
			
 
				-          else
			
 
				-            echo "Test failed: Devices did not discover each other"
			
 
				-            echo "Output of first instance:"
			
 
				-            cat output1.log
			
 
				-            echo "Output of second instance:"
			
 
				-            cat output2.log
			
 
				-            exit 1
			
 
				-          fi
			
 
				-
			
 
				-  chatgpt_api_tests:
			
 
				-    runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'ubuntu-latest' || 'macos-15' }}
			
 
				-    strategy:
			
 
				-      matrix:
			
 
				-        # inference_engine: [mlx, tinygrad, dummy]
			
 
				-        inference_engine: [tinygrad, dummy]
			
 
				-        include:
			
 
				-          # - inference_engine: mlx
			
 
				-          #   model_id: llama-3.2-1b
			
 
				-          #   prompt: "Keep responses concise. Who was the king of pop?"
			
 
				-          #   expected_output: "Michael Jackson"
			
 
				-          - inference_engine: tinygrad
			
 
				-            model_id: llama-3.2-1b
			
 
				-            prompt: "Keep responses concise. Who was the king of pop?"
			
 
				-            expected_output: "Michael Jackson"
			
 
				-          - inference_engine: dummy
			
 
				-            model_id: dummy
			
 
				-            prompt: "Dummy prompt."
			
 
				-            expected_output: "dummy"
			
 
				-
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				-
			
 
				-      - name: Set up Python
			
 
				-        uses: actions/setup-python@v5
			
 
				-        with:
			
 
				-          python-version: ${{ env.PYTHON_VERSION }}
			
 
				-
			
 
				-      - name: Install dependencies
			
 
				-        run: |
			
 
				-          python -m venv env
			
 
				-          source env/bin/activate
			
 
				-          pip install --upgrade pip
			
 
				-          pip install .
			
 
				-          if [ "${{ matrix.inference_engine }}" = "tinygrad" ]; then
			
 
				-            pip install llvmlite
			
 
				-          fi
			
 
				-
			
 
				-      - name: Run ChatGPT API test
			
 
				-        env:
			
 
				-          TOKENIZERS_PARALLELISM: ${{ matrix.inference_engine == 'tinygrad' && 'true' || 'false' }}
			
 
				-          SUPPORT_BF16: ${{ matrix.inference_engine == 'tinygrad' && '0' || '0' }}
			
 
				-          CLANG: ${{ matrix.inference_engine == 'tinygrad' && '1' || '0' }}
			
 
				-        run: |
			
 
				-          source env/bin/activate
			
 
				-
			
 
				-          # Start first instance
			
 
				-          HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine ${{ matrix.inference_engine }} \
			
 
				-            --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 \
			
 
				-            --chatgpt-api-response-timeout 900 --disable-tui > output1.log &
			
 
				-          PID1=$!
			
 
				-          tail -f output1.log &
			
 
				-          TAIL1=$!
			
 
				-
			
 
				-          # Start second instance
			
 
				-          HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine ${{ matrix.inference_engine }} \
			
 
				-            --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 \
			
 
				-            --chatgpt-api-response-timeout 900 --disable-tui > output2.log &
			
 
				-          PID2=$!
			
 
				-          tail -f output2.log &
			
 
				-          TAIL2=$!
			
 
				-
			
 
				-          # Remember to kill the tail processes at the end
			
 
				-          trap 'kill $TAIL1 $TAIL2' EXIT
			
 
				-
			
 
				-          # Wait for discovery and verify peer connections
			
 
				-          sleep 10
			
 
				-          if ! grep -q "Peer statuses: {.*'node2': 'is_connected=True, health_check=True" output1.log || \
			
 
				-             grep -q "Failed to connect peers:" output1.log || \
			
 
				-             ! grep -q "Peer statuses: {.*'node1': 'is_connected=True, health_check=True" output2.log || \
			
 
				-             grep -q "Failed to connect peers:" output2.log; then
			
 
				-            echo "Test failed: Nodes did not discover each other properly"
			
 
				-            echo "Output of first instance:"
			
 
				-            cat output1.log
			
 
				-            echo "Output of second instance:"
			
 
				-            cat output2.log
			
 
				-            exit 1
			
 
				-          fi
			
 
				-          echo "Peer discovery successful"
			
 
				-
			
 
				-          # Function to check if processes are still running
			
 
				-          check_processes() {
			
 
				-            if ! kill -0 $PID1 2>/dev/null; then
			
 
				-              echo "First instance (PID $PID1) died unexpectedly. Log output:"
			
 
				-              cat output1.log
			
 
				-              exit 1
			
 
				-            fi
			
 
				-            if ! kill -0 $PID2 2>/dev/null; then
			
 
				-              echo "Second instance (PID $PID2) died unexpectedly. Log output:"
			
 
				-              cat output2.log
			
 
				-              exit 1
			
 
				-            fi
			
 
				-          }
			
 
				-
			
 
				-          # Check processes before proceeding
			
 
				-          check_processes
			
 
				-
			
 
				-          echo "Sending request to first instance..."
			
 
				-          response_1=$(curl -s http://localhost:8000/v1/chat/completions \
			
 
				-            -H "Content-Type: application/json" \
			
 
				-            -d '{
			
 
				-              "model": "${{ matrix.model_id }}",
			
 
				-              "messages": [{"role": "user", "content": "${{ matrix.prompt }}"}],
			
 
				-              "temperature": 0.7
			
 
				-            }')
			
 
				-          echo "Response 1: $response_1"
			
 
				-
			
 
				-          # Check processes after first response
			
 
				-          check_processes
			
 
				-
			
 
				-          echo "Sending request to second instance..."
			
 
				-          response_2=$(curl -s http://localhost:8001/v1/chat/completions \
			
 
				-            -H "Content-Type: application/json" \
			
 
				-            -d '{
			
 
				-              "model": "${{ matrix.model_id }}",
			
 
				-              "messages": [{"role": "user", "content": "${{ matrix.prompt }}"}],
			
 
				-              "temperature": 0.7
			
 
				-            }')
			
 
				-          echo "Response 2: $response_2"
			
 
				-
			
 
				-          # Check processes after second response
			
 
				-          check_processes
			
 
				-
			
 
				-          # Stop both instances
			
 
				-          kill $PID1 $PID2
			
 
				-
			
 
				-          echo ""
			
 
				-          # Extract content using jq and check if it contains expected output
			
 
				-          content1=$(echo "$response_1" | jq -r '.choices[0].message.content')
			
 
				-          content2=$(echo "$response_2" | jq -r '.choices[0].message.content')
			
 
				-
			
 
				-          if [[ "$content1" != *"${{ matrix.expected_output }}"* ]] || [[ "$content2" != *"${{ matrix.expected_output }}"* ]]; then
			
 
				-            echo "Test failed: Response does not match '${{ matrix.expected_output }}'"
			
 
				-            echo "Response 1 content: $content1"
			
 
				-            echo ""
			
 
				-            echo "Response 2 content: $content2"
			
 
				-            echo "Output of first instance:"
			
 
				-            cat output1.log
			
 
				-            echo "Output of second instance:"
			
 
				-            cat output2.log
			
 
				-            exit 1
			
 
				-          else
			
 
				-            echo "Test passed: Response from both nodes matches '${{ matrix.expected_output }}'"
			
 
				-          fi
			
 
				-
			
 
				-  measure_pip_sizes:
			
 
				-    runs-on: macos-15
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				-
			
 
				-      - name: Set up Python
			
 
				-        uses: actions/setup-python@v5
			
 
				-        with:
			
 
				-          python-version: ${{ env.PYTHON_VERSION }}
			
 
				-
			
 
				-      - name: Install dependencies and measure sizes
			
 
				-        run: |
			
 
				-          python -m venv env
			
 
				-          source env/bin/activate
			
 
				-          pip install --upgrade pip
			
 
				-          pip install .
			
 
				-          python ./extra/pipsize.py --json ./pipsize.json
			
 
				-
			
 
				-      - name: Upload pip sizes artifact
			
 
				-        uses: actions/upload-artifact@v4
			
 
				-        with:
			
 
				-          name: pip-sizes
			
 
				-          path: ./pipsize.json
			
 
				+          echo "GITHUB_JOB: ${GITHUB_JOB}, GITHUB_RUN_ID: {GITHUB_RUN_ID}, GITHUB_RUN_NUMBER: {GITHUB_RUN_NUMBER}, GITHUB_WORKFLOW: ${GITHUB_WORKFLOW}"