Browse Source

force mlx inference engine in github workflow, where it defaults to tinygrad because it's running on 'model': 'Apple Virtual Machine 1', 'chip': 'Apple M1 (Virtual)'

Alex Cheema 1 year ago
parent
commit
628d8679b0
1 changed files with 2 additions and 2 deletions
  1. 2 2
      .github/workflows/test.yml

+ 2 - 2
.github/workflows/test.yml

@@ -117,11 +117,11 @@ jobs:
         ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true
 
         # Start first instance
-        DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 &
+        DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 &
         PID1=$!
 
         # Start second instance
-        DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout-secs 900 > output2.log 2>&1 &
+        DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout-secs 900 > output2.log 2>&1 &
         PID2=$!
 
         # Wait for discovery