Glen 7 months ago
parent
commit
732ba915aa
3 changed files with 49 additions and 28 deletions
  1. 2 4
      .github/bench.py
  2. 31 24
      .github/workflows/bench_job.yml
  3. 16 0
      .github/workflows/benchmarks.yml

+ 2 - 4
.github/bench.py

@@ -21,9 +21,7 @@ async def measure_performance(api_endpoint: str, prompt: str) -> Dict[str, Any]:
     """
     model = os.environ.get('model')
     results: Dict[str, Any] = {'model': model, 'run_id': os.environ.get('GITHUB_RUN_ID')}
-    results['configuration'] = {
-        'M4': 2 # TODO get this through env vars from the matrix def
-    }
+    results['configuration'] = json.loads(os.environ.get('HARDWARE_CONFIG'))
 
     # Get prompt length in tokens
     async with aiohttp.ClientSession() as session:
@@ -37,7 +35,7 @@ async def measure_performance(api_endpoint: str, prompt: str) -> Dict[str, Any]:
                 json=request_payload
             ) as response:
                 token_data = await response.json()
-                prompt_tokens = token_data.get('length', 0)
+                prompt_tokens = token_data.get('num_tokens', 0)
                 print(f"Prompt length: {prompt_tokens} tokens", flush=True)
         except Exception as e:
             print(f"Failed to get prompt length: {e}", flush=True)

+ 31 - 24
.github/workflows/build_and_test.yml → .github/workflows/bench_job.yml

@@ -1,26 +1,38 @@
-name: Build and Test
+# This is the reusable workflow file
+name: Distributed Job Runner
 
 on:
-  push:
-    branches: [ '*' ]
-    tags: [ '*' ]
-  pull_request:
-    branches: [ '*' ]
-
-env:
-  PYTHON_VERSION: "3.12"
-  TOKENIZERS_PARALLELISM: "false"
-  PYTHONPATH: "."
-
+  workflow_call:
+    inputs:
+      config:
+        required: true
+        type: string
+      model:
+        required: true
+        type: string
 jobs:
-  check_local_runner:
+  generate-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - id: set-matrix
+        env:
+          CONFIG: ${{ inputs.config }}
+        run: |
+          MATRIX=$(echo $CONFIG | jq -c '{cpu: [to_entries | .[] | .key as $k | range(.value) | $k]}')
+          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
+
+  run-distributed-job:
+    needs: generate-matrix
     strategy:
-      matrix:
-        cpu: ['M4', 'M4']
+      matrix: ${{fromJson(needs.generate-matrix.outputs.matrix)}}
     runs-on: ['self-hosted', 'macOS', '${{ matrix.cpu }}']
+    env:
+      HARDWARE_CONFIG: ${{ inputs.config }}
+      model: ${{ inputs.model }}
     steps:
       - uses: actions/checkout@v4
-
       - name: Install dependencies
         run: |
           # First, find where python3.12 is installed
@@ -37,18 +49,16 @@ jobs:
           pip install --upgrade pip
           pip install .
           pip install boto3==1.35.76
-
       - name: Run exo
         env:
           aws_access_key_id: ${{ secrets.S3_EXO_BENCHMARKS_AWS_ACCESS_KEY_ID }}
           aws_secret_key: ${{ secrets.S3_EXO_BENCHMARKS_AWS_SECRET_ACCESS_KEY }}
-          model: llama-3.2-1b
         run: |
-          ALL_NODE_IDS=$(for i in $(seq ${{ strategy.job-total }} -1 0); do echo -n "${GITHUB_JOB}_${i},"; done | sed 's/,$//')
-          MY_NODE_ID="${GITHUB_JOB}_${{ strategy.job-index }}"
+          UNIQUE_JOB_ID="${GITHUB_JOB}_${GITHUB_RUN_ID}"
+          ALL_NODE_IDS=$(for i in $(seq ${{ strategy.job-total }} -1 0); do echo -n "${UNIQUE_JOB_ID}_${i},"; done | sed 's/,$//')
+          MY_NODE_ID="${UNIQUE_JOB_ID}_${{ strategy.job-index }}"
           source env/bin/activate
           export PATH="/usr/local/bin:/opt/homebrew/bin:$PATH"
-          echo "${ALL_NODE_IDS}"
           exo --node-id="${MY_NODE_ID}" --node-id-filter="${ALL_NODE_IDS}" --chatgpt-api-port 52415 --disable-tui > output1.log 2>&1 &
           PID1=$!
           tail -f output1.log &
@@ -83,6 +93,3 @@ jobs:
             sleep 5
           done
           fi
-      - name: Test
-        run: |
-          echo "GITHUB_JOB: ${GITHUB_JOB}, GITHUB_RUN_ID: ${GITHUB_RUN_ID}, GITHUB_RUN_NUMBER: ${GITHUB_RUN_NUMBER}, GITHUB_WORKFLOW: ${GITHUB_WORKFLOW}"

+ 16 - 0
.github/workflows/benchmarks.yml

@@ -0,0 +1,16 @@
+name: Build and Test
+
+on:
+  push:
+    branches: [ '*' ]
+    tags: [ '*' ]
+  pull_request:
+    branches: [ '*' ]
+
+jobs:
+  test-m4-cluster:
+    uses: ./.github/workflows/distributed_job.yml
+    with:
+      config: '{"M4PRO_GPU16_24GB": 2}'
+      model: 'llama-3.2-1b'
+    secrets: inherit