6 months ago · 023ddc207e
--- a/.github/workflows/bench_job.yml
+++ b/.github/workflows/bench_job.yml
@@ -13,6 +13,9 @@ on:
 
				       calling_job_name:
			
 
				         required: true
			
 
				         type: string
			
 
				+      network_interface:
			
 
				+        required: true
			
 
				+        type: string
			
 
				 jobs:
			
 
				   generate-matrix:
			
 
				     runs-on: ubuntu-latest
			
@@ -122,7 +125,7 @@ jobs:
 
				           sudo taskpolicy -d default -g default -a -t 0 -l 0 .venv/bin/exo \
			
 
				             --node-id="${MY_NODE_ID}" \
			
 
				             --node-id-filter="${ALL_NODE_IDS}" \
			
 
				-            --interface-type-filter="Ethernet" \
			
 
				+            --interface-type-filter="${{ inputs.network_interface }}" \
			
 
				             --disable-tui \
			
 
				             --max-generate-tokens 250 \
			
 
				             --chatgpt-api-port 52415 > output1.log 2>&1 &
			
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -17,6 +17,7 @@ jobs:
 
				       config: '{"M4PRO_GPU16_24GB": 1}'
			
 
				       model: ${{ matrix.model }}
			
 
				       calling_job_name: 'single-m4-pro'
			
 
				+      network_interface: 'Ethernet'
			
 
				     secrets: inherit
			
 
				 
			
 
				   two-m4-pro-cluster:
			
@@ -28,30 +29,43 @@ jobs:
 
				       config: '{"M4PRO_GPU16_24GB": 2}'
			
 
				       model: ${{ matrix.model }}
			
 
				       calling_job_name: 'two-m4-pro-cluster'
			
 
				+      network_interface: 'Ethernet'
			
 
				     secrets: inherit
			
 
				 
			
 
				+  # two-m4-pro-cluster-thunderbolt:
			
 
				+  #   strategy:
			
 
				+  #     matrix:
			
 
				+  #       model: ['llama-3.2-1b', 'llama-3.2-3b', 'llama-3.1-8b']
			
 
				+  #   uses: ./.github/workflows/bench_job.yml
			
 
				+  #   with:
			
 
				+  #     config: '{"M4PRO_GPU16_24GB": 2}'
			
 
				+  #     model: ${{ matrix.model }}
			
 
				+  #     calling_job_name: 'two-m4-pro-cluster-thunderbolt'
			
 
				+  #     network_interface: 'Thunderbolt'
			
 
				+  #   secrets: inherit
			
 
				+
			
 
				   three-m4-pro-cluster:
			
 
				     strategy:
			
 
				       matrix:
			
 
				         model: ['llama-3.2-1b', 'llama-3.2-3b', 'llama-3.1-8b', 'llama-3.3-70b']
			
 
				-      # Optional: add fail-fast: false if you want all matrix jobs to continue even if one fails
			
 
				       fail-fast: false
			
 
				     uses: ./.github/workflows/bench_job.yml
			
 
				     with:
			
 
				       config: '{"M4PRO_GPU16_24GB": 3}'
			
 
				       model: ${{ matrix.model }}
			
 
				       calling_job_name: 'three-m4-pro-cluster'
			
 
				+      network_interface: 'Ethernet'
			
 
				     secrets: inherit
			
 
				 
			
 
				-  # test-m3-single-node:
			
 
				-  #   strategy:
			
 
				-  #     matrix:
			
 
				-  #       model: ['llama-3.2-1b']
			
 
				-  #     # Optional: add fail-fast: false if you want all matrix jobs to continue even if one fails
			
 
				-  #     fail-fast: false
			
 
				-  #   uses: ./.github/workflows/bench_job.yml
			
 
				-  #   with:
			
 
				-  #     config: '{"M3MAX_GPU40_128GB": 1}'
			
 
				-  #     model: ${{ matrix.model }}
			
 
				-  #     calling_job_name: 'test-m3-cluster'
			
 
				-  #   secrets: inherit
			
 
				+  test-m3-single-node:
			
 
				+    strategy:
			
 
				+      matrix:
			
 
				+        model: ['llama-3.2-1b']
			
 
				+      fail-fast: false
			
 
				+    uses: ./.github/workflows/bench_job.yml
			
 
				+    with:
			
 
				+      config: '{"M3MAX_GPU40_128GB": 1}'
			
 
				+      model: ${{ matrix.model }}
			
 
				+      calling_job_name: 'test-m3-cluster'
			
 
				+      network_interface: 'Ethernet'
			
 
				+    secrets: inherit