فهرست منبع

Merge pull request #114 from exo-explore/circleci-project-setup

Circleci project setup
Alex Cheema 9 ماه پیش
والد
کامیت
e3a524fd89
3فایلهای تغییر یافته به همراه182 افزوده شده و 202 حذف شده
  1. 180 0
      .circleci/config.yml
  2. 0 200
      .github/workflows/test.yml
  3. 2 2
      exo/api/chatgpt_api.py

+ 180 - 0
.circleci/config.yml

@@ -0,0 +1,180 @@
+version: 2.1
+
+orbs:
+  python: circleci/python@2
+
+jobs:
+  unit_test:
+    macos:
+      xcode: "15.2.0"
+    resource_class: macos.m1.medium.gen1
+    steps:
+      - checkout
+      - run:
+          name: Set up Python
+          command: |
+            brew install python@3.12
+            python3.12 -m venv env
+            source env/bin/activate
+      - run:
+          name: Install dependencies
+          command: |
+            source env/bin/activate
+            pip install --upgrade pip
+            pip install .
+      - run:
+          name: Run tests
+          command: |
+            source env/bin/activate
+            METAL_XCODE=1 python3 -m exo.inference.test_inference_engine
+
+  discovery_integration_test:
+    macos:
+      xcode: "15.2.0"
+    steps:
+      - checkout
+      - run:
+          name: Set up Python
+          command: |
+            brew install python@3.12
+            python3.12 -m venv env
+            source env/bin/activate
+      - run:
+          name: Install dependencies
+          command: |
+            source env/bin/activate
+            pip install --upgrade pip
+            pip install .
+      - run:
+          name: Run discovery integration test
+          command: |
+            source env/bin/activate
+            DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 &
+            PID1=$!
+            DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 &
+            PID2=$!
+            sleep 10
+            kill $PID1 $PID2
+            if grep -q "Connected to peer" output1.log && grep -q "Connected to peer" output2.log; then
+              echo "Test passed: Both instances discovered each other"
+              exit 0
+            else
+              echo "Test failed: Devices did not discover each other"
+              echo "Output of first instance:"
+              cat output1.log
+              echo "Output of second instance:"
+              cat output2.log
+              exit 1
+            fi
+
+  chatgpt_api_integration_test:
+    macos:
+      xcode: "15.2.0"
+    steps:
+      - checkout
+      - run:
+          name: Set up Python
+          command: |
+            brew install python@3.12
+            python3.12 -m venv env
+            source env/bin/activate
+      - run:
+          name: Install dependencies
+          command: |
+            source env/bin/activate
+            pip install --upgrade pip
+            pip install .
+      - run:
+          name: Run chatgpt api integration test
+          command: |
+            source env/bin/activate
+
+            # Start first instance
+            DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 &
+            PID1=$!
+
+            # Start second instance
+            DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout-secs 900 > output2.log 2>&1 &
+            PID2=$!
+
+            # Wait for discovery
+            sleep 10
+
+            # Function to check if processes are still running
+            check_processes() {
+              if ! kill -0 $PID1 2>/dev/null; then
+                echo "First instance (PID $PID1) died unexpectedly. Log output:"
+                cat output1.log
+                exit 1
+              fi
+              if ! kill -0 $PID2 2>/dev/null; then
+                echo "Second instance (PID $PID2) died unexpectedly. Log output:"
+                cat output2.log
+                exit 1
+              fi
+            }
+
+            # Check processes before proceeding
+            check_processes
+
+            # first one to load the model
+            curl -s http://localhost:8000/v1/chat/completions \
+                -H "Content-Type: application/json" \
+                -d '{
+                  "model": "llama-3.1-8b",
+                  "messages": [{"role": "user", "content": "Keep responses concise. Placeholder to load model..."}],
+                  "temperature": 0.7
+                }'
+
+            # Check processes after model load
+            check_processes
+
+            response_1=$(curl -s http://localhost:8000/v1/chat/completions \
+              -H "Content-Type: application/json" \
+              -d '{
+                "model": "llama-3.1-8b",
+                "messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
+                "temperature": 0.7
+              }')
+            echo "Response 1: $response_1"
+
+            # Check processes after first response
+            check_processes
+
+            response_2=$(curl -s http://localhost:8000/v1/chat/completions \
+              -H "Content-Type: application/json" \
+              -d '{
+                "model": "llama-3.1-8b",
+                "messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
+                "temperature": 0.7
+              }')
+            echo "Response 2: $response_2"
+
+            # Check processes after second response
+            check_processes
+
+            # Stop both instances
+            kill $PID1 $PID2
+
+            echo ""
+            if ! echo "$response_1" | grep -q "Michael Jackson" || ! echo "$response_2" | grep -q "Michael Jackson"; then
+              echo "Test failed: Response does not contain 'Michael Jackson'"
+              echo "Response 1: $response_1"
+              echo ""
+              echo "Response 2: $response_2"
+              echo "Output of first instance:"
+              cat output1.log
+              echo "Output of second instance:"
+              cat output2.log
+              exit 1
+            else
+              echo "Test passed: Response from both nodes contains 'Michael Jackson'"
+            fi
+
+workflows:
+  version: 2
+  build_and_test:
+    jobs:
+      - unit_test
+      - discovery_integration_test
+      - chatgpt_api_integration_test

+ 0 - 200
.github/workflows/test.yml

@@ -1,200 +0,0 @@
-name: Python Tests on M1 Mac
-
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
-
-jobs:
-  unit_test:
-    runs-on: macos-14
-    steps:
-    - uses: actions/checkout@v2
-
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: '3.12'
-
-    - name: Cache huggingface hub models
-      uses: actions/cache@v3
-      with:
-        path: ~/.cache/huggingface/hub
-        key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/**/*') }}-${{ github.job }}
-
-    - name: Install dependencies
-      run: |
-        python3 -m pip install --upgrade pip
-        pip install .
-
-    - name: Run tests
-      run: |
-        # Check if cached files are present
-        ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true
-
-        # Run unit tests
-        METAL_XCODE=1 python3 -m exo.inference.test_inference_engine
-
-  discovery_integration_test:
-    runs-on: macos-latest
-    steps:
-    - uses: actions/checkout@v2
-
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: '3.x'
-
-    - name: Install dependencies
-      run: |
-        python3 -m pip install --upgrade pip
-        pip install .
-
-    - name: Run discovery integration test
-      run: |
-        # Start first instance
-        DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 &
-        PID1=$!
-
-        # Start second instance
-        DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 &
-        PID2=$!
-
-        # Wait for discovery
-        sleep 10
-
-        # Stop both instances
-        kill $PID1 $PID2
-
-        # Check outputs
-        if grep -q "Connected to peer" output1.log && grep -q "Connected to peer" output2.log; then
-          echo "Test passed: Both instances discovered each other"
-          exit 0
-        else
-          echo "Test failed: Devices did not discover each other"
-          echo "Output of first instance:"
-          cat output1.log
-          echo "Output of second instance:"
-          cat output2.log
-          exit 1
-        fi
-
-  chatgpt_api_integration_test:
-    runs-on: macos-latest
-    steps:
-    - uses: actions/checkout@v2
-
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: '3.x'
-
-    - name: Cache huggingface hub models
-      uses: actions/cache@v3
-      with:
-        path: ~/.cache/huggingface/hub
-        key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/**/*') }}-${{ github.job }}
-        restore-keys: |
-          ${{ runner.os }}-huggingface-hub-
-
-    - name: Cache tinygrad downloaded models
-      uses: actions/cache@v3
-      with:
-        path: ~/Library/Caches/tinygrad/downloads
-        key: ${{ runner.os }}-tinygrad-downloads-${{ hashFiles('~/Library/Caches/tinygrad/downloads/**/*') }}-${{ github.job }}
-        restore-keys: |
-          ${{ runner.os }}-tinygrad-downloads-
-
-    - name: Install dependencies
-      run: |
-        python3 -m pip install --upgrade pip
-        pip install .
-
-    - name: Run chatgpt api integration test
-      run: |
-        exit 0 # TODO
-        # Check if cached files are present
-        ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true
-
-        # Start first instance
-        DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 &
-        PID1=$!
-
-        # Start second instance
-        DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout-secs 900 > output2.log 2>&1 &
-        PID2=$!
-
-        # Wait for discovery
-        sleep 10
-
-        # Function to check if processes are still running
-        check_processes() {
-          if ! kill -0 $PID1 2>/dev/null; then
-            echo "First instance (PID $PID1) died unexpectedly. Log output:"
-            cat output1.log
-            exit 1
-          fi
-          if ! kill -0 $PID2 2>/dev/null; then
-            echo "Second instance (PID $PID2) died unexpectedly. Log output:"
-            cat output2.log
-            exit 1
-          fi
-        }
-
-        # Check processes before proceeding
-        check_processes
-
-        # first one to load the model
-        curl -s http://localhost:8000/v1/chat/completions \
-            -H "Content-Type: application/json" \
-            -d '{
-              "model": "llama-3-8b",
-              "messages": [{"role": "user", "content": "Keep responses concise. Placeholder to load model..."}],
-              "temperature": 0.7
-            }'
-
-        # Check processes after model load
-        check_processes
-
-        response_1=$(curl -s http://localhost:8000/v1/chat/completions \
-          -H "Content-Type: application/json" \
-          -d '{
-            "model": "llama-3-8b",
-            "messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
-            "temperature": 0.7
-          }')
-        echo "Response 1: $response_1"
-
-        # Check processes after first response
-        check_processes
-
-        response_2=$(curl -s http://localhost:8000/v1/chat/completions \
-          -H "Content-Type: application/json" \
-          -d '{
-            "model": "llama-3-8b",
-            "messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
-            "temperature": 0.7
-          }')
-        echo "Response 2: $response_2"
-
-        # Check processes after second response
-        check_processes
-
-        # Stop both instances
-        kill $PID1 $PID2
-
-        echo ""
-        if ! echo "$response_1" | grep -q "Michael Jackson" || ! echo "$response_2" | grep -q "Michael Jackson"; then
-          echo "Test failed: Response does not contain 'Michael Jackson'"
-          echo "Response 1: $response_1"
-          echo ""
-          echo "Response 2: $response_2"
-          echo "Output of first instance:"
-          cat output1.log
-          echo "Output of second instance:"
-          cat output2.log
-          exit 1
-        else
-          echo "Test passed: Response from both nodes contains 'Michael Jackson'"
-        fi

+ 2 - 2
exo/api/chatgpt_api.py

@@ -307,9 +307,9 @@ class ChatGPTAPI:
     prompt, image_str = build_prompt(tokenizer, chat_request.messages)
     request_id = None
     match = self.prompts.find_longest_prefix(prompt)
-    if match:
+    if match and len(prompt) > len(match[1].prompt):
         if DEBUG >= 2:
-            print(f"Prompt for request starts with previous prompt {len(match[1].prompt)} of {len(prompt)}: {match[1].prompt}")
+          print(f"Prompt for request starts with previous prompt {len(match[1].prompt)} of {len(prompt)}: {match[1].prompt}")
         request_id = match[1].request_id
         self.prompts.add(prompt, PromptSession(request_id=request_id, timestamp=int(time.time()), prompt=prompt))
         # remove the matching prefix from the prompt