|
@@ -20,6 +20,12 @@ commands:
|
|
command: |
|
|
command: |
|
|
source env/bin/activate
|
|
source env/bin/activate
|
|
|
|
|
|
|
|
+ # Set CLANG=1 for tinygrad only
|
|
|
|
+ if [ "<<parameters.inference_engine>>" = "tinygrad" ]; then
|
|
|
|
+ pip install llvmlite
|
|
|
|
+ export TOKENIZERS_PARALLELISM=true SUPPORT_BF16=0 CLANG=1
|
|
|
|
+ fi
|
|
|
|
+
|
|
# Start first instance
|
|
# Start first instance
|
|
HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout 900 2>&1 | tee output1.log &
|
|
HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout 900 2>&1 | tee output1.log &
|
|
PID1=$!
|
|
PID1=$!
|
|
@@ -48,13 +54,6 @@ commands:
|
|
# Check processes before proceeding
|
|
# Check processes before proceeding
|
|
check_processes
|
|
check_processes
|
|
|
|
|
|
- # Special handling for dummy engine
|
|
|
|
- if [ "<<parameters.inference_engine>>" = "dummy" ]; then
|
|
|
|
- expected_content="This is a dummy response"
|
|
|
|
- else
|
|
|
|
- expected_content="Michael Jackson"
|
|
|
|
- fi
|
|
|
|
-
|
|
|
|
echo "Sending request to first instance..."
|
|
echo "Sending request to first instance..."
|
|
response_1=$(curl -s http://localhost:8000/v1/chat/completions \
|
|
response_1=$(curl -s http://localhost:8000/v1/chat/completions \
|
|
-H "Content-Type: application/json" \
|
|
-H "Content-Type: application/json" \
|
|
@@ -223,29 +222,29 @@ jobs:
|
|
- checkout
|
|
- checkout
|
|
- run: system_profiler SPHardwareDataType
|
|
- run: system_profiler SPHardwareDataType
|
|
|
|
|
|
- # chatgpt_api_integration_test_tinygrad:
|
|
|
|
- # macos:
|
|
|
|
- # xcode: "16.0.0"
|
|
|
|
- # resource_class: m2pro.large
|
|
|
|
- # steps:
|
|
|
|
- # - checkout
|
|
|
|
- # - run:
|
|
|
|
- # name: Set up Python
|
|
|
|
- # command: |
|
|
|
|
- # brew install python@3.12
|
|
|
|
- # python3.12 -m venv env
|
|
|
|
- # source env/bin/activate
|
|
|
|
- # - run:
|
|
|
|
- # name: Install dependencies
|
|
|
|
- # command: |
|
|
|
|
- # source env/bin/activate
|
|
|
|
- # pip install --upgrade pip
|
|
|
|
- # pip install .
|
|
|
|
- # - run_chatgpt_api_test:
|
|
|
|
- # inference_engine: tinygrad
|
|
|
|
- # model_id: llama-3-8b
|
|
|
|
- # prompt: "Keep responses concise. Who was the king of pop?"
|
|
|
|
- # expected_output: "Michael Jackson"
|
|
|
|
|
|
+ chatgpt_api_integration_test_tinygrad:
|
|
|
|
+ macos:
|
|
|
|
+ xcode: "16.0.0"
|
|
|
|
+ resource_class: m2pro.large
|
|
|
|
+ steps:
|
|
|
|
+ - checkout
|
|
|
|
+ - run:
|
|
|
|
+ name: Set up Python
|
|
|
|
+ command: |
|
|
|
|
+ brew install python@3.12
|
|
|
|
+ python3.12 -m venv env
|
|
|
|
+ source env/bin/activate
|
|
|
|
+ - run:
|
|
|
|
+ name: Install dependencies
|
|
|
|
+ command: |
|
|
|
|
+ source env/bin/activate
|
|
|
|
+ pip install --upgrade pip
|
|
|
|
+ pip install .
|
|
|
|
+ - run_chatgpt_api_test:
|
|
|
|
+ inference_engine: tinygrad
|
|
|
|
+ model_id: llama-3.2-1b
|
|
|
|
+ prompt: "Keep responses concise. Who was the king of pop?"
|
|
|
|
+ expected_output: "Michael Jackson"
|
|
|
|
|
|
workflows:
|
|
workflows:
|
|
version: 2
|
|
version: 2
|
|
@@ -254,6 +253,6 @@ workflows:
|
|
- unit_test
|
|
- unit_test
|
|
- discovery_integration_test
|
|
- discovery_integration_test
|
|
- chatgpt_api_integration_test_mlx
|
|
- chatgpt_api_integration_test_mlx
|
|
|
|
+ - chatgpt_api_integration_test_tinygrad
|
|
- chatgpt_api_integration_test_dummy
|
|
- chatgpt_api_integration_test_dummy
|
|
- test_macos_m1
|
|
- test_macos_m1
|
|
- # - chatgpt_api_integration_test_tinygrad
|
|
|