lqb
/
exo
огледало од https://github.com/exo-explore/exo


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
							version: 2.1

orbs:
  python: circleci/python@2

jobs:
  unit_test:
    macos:
      xcode: "15.2.0"
    resource_class: macos.m1.medium.gen1
    steps:
      - checkout
      - run:
          name: Set up Python
          command: |
            brew install python@3.12
            python3.12 -m venv env
            source env/bin/activate
      - restore_cache:
          keys:
            - huggingface-hub-{{ checksum "~/.cache/huggingface/hub/**/*" }}-{{ .Environment.CIRCLE_JOB }}
      - run:
          name: Install dependencies
          command: |
            source env/bin/activate
            pip install --upgrade pip
            pip install .
      - run:
          name: Run tests
          command: |
            source env/bin/activate
            ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true
            METAL_XCODE=1 python3 -m exo.inference.test_inference_engine
      - save_cache:
          paths:
            - ~/.cache/huggingface/hub
          key: huggingface-hub-{{ checksum "~/.cache/huggingface/hub/**/*" }}-{{ .Environment.CIRCLE_JOB }}

  discovery_integration_test:
    macos:
      xcode: "15.2.0"
    steps:
      - checkout
      - run:
          name: Set up Python
          command: |
            brew install python@3.12
            python3.12 -m venv env
            source env/bin/activate
      - run:
          name: Install dependencies
          command: |
            source env/bin/activate
            pip install --upgrade pip
            pip install .
      - run:
          name: Run discovery integration test
          command: |
            source env/bin/activate
            DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 &
            PID1=$!
            DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 &
            PID2=$!
            sleep 10
            kill $PID1 $PID2
            if grep -q "Connected to peer" output1.log && grep -q "Connected to peer" output2.log; then
              echo "Test passed: Both instances discovered each other"
              exit 0
            else
              echo "Test failed: Devices did not discover each other"
              echo "Output of first instance:"
              cat output1.log
              echo "Output of second instance:"
              cat output2.log
              exit 1
            fi

  chatgpt_api_integration_test:
    macos:
      xcode: "15.2.0"
    steps:
      - checkout
      - run:
          name: Set up Python
          command: |
            brew install python@3.12
            python3.12 -m venv env
            source env/bin/activate
      - restore_cache:
          keys:
            - huggingface-hub-{{ checksum "~/.cache/huggingface/hub/**/*" }}-{{ .Environment.CIRCLE_JOB }}
            - huggingface-hub-
      - restore_cache:
          keys:
            - tinygrad-downloads-{{ checksum "~/Library/Caches/tinygrad/downloads/**/*" }}-{{ .Environment.CIRCLE_JOB }}
            - tinygrad-downloads-
      - run:
          name: Install dependencies
          command: |
            source env/bin/activate
            pip install --upgrade pip
            pip install .
      - run:
          name: Run chatgpt api integration test
          command: |
            source env/bin/activate
            # Check if cached files are present
            ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true

            # Start first instance
            DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 &
            PID1=$!

            # Start second instance
            DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout-secs 900 > output2.log 2>&1 &
            PID2=$!

            # Wait for discovery
            sleep 10

            # Function to check if processes are still running
            check_processes() {
              if ! kill -0 $PID1 2>/dev/null; then
                echo "First instance (PID $PID1) died unexpectedly. Log output:"
                cat output1.log
                exit 1
              fi
              if ! kill -0 $PID2 2>/dev/null; then
                echo "Second instance (PID $PID2) died unexpectedly. Log output:"
                cat output2.log
                exit 1
              fi
            }

            # Check processes before proceeding
            check_processes

            # first one to load the model
            curl -s http://localhost:8000/v1/chat/completions \
                -H "Content-Type: application/json" \
                -d '{
                  "model": "llama-3-8b",
                  "messages": [{"role": "user", "content": "Keep responses concise. Placeholder to load model..."}],
                  "temperature": 0.7
                }'

            # Check processes after model load
            check_processes

            response_1=$(curl -s http://localhost:8000/v1/chat/completions \
              -H "Content-Type: application/json" \
              -d '{
                "model": "llama-3-8b",
                "messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
                "temperature": 0.7
              }')
            echo "Response 1: $response_1"

            # Check processes after first response
            check_processes

            response_2=$(curl -s http://localhost:8000/v1/chat/completions \
              -H "Content-Type: application/json" \
              -d '{
                "model": "llama-3-8b",
                "messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
                "temperature": 0.7
              }')
            echo "Response 2: $response_2"

            # Check processes after second response
            check_processes

            # Stop both instances
            kill $PID1 $PID2

            echo ""
            if ! echo "$response_1" | grep -q "Michael Jackson" || ! echo "$response_2" | grep -q "Michael Jackson"; then
              echo "Test failed: Response does not contain 'Michael Jackson'"
              echo "Response 1: $response_1"
              echo ""
              echo "Response 2: $response_2"
              echo "Output of first instance:"
              cat output1.log
              echo "Output of second instance:"
              cat output2.log
              exit 1
            else
              echo "Test passed: Response from both nodes contains 'Michael Jackson'"
            fi
      - save_cache:
          paths:
            - ~/.cache/huggingface/hub
          key: huggingface-hub-{{ checksum "~/.cache/huggingface/hub/**/*" }}-{{ .Environment.CIRCLE_JOB }}
      - save_cache:
          paths:
            - ~/Library/Caches/tinygrad/downloads
          key: tinygrad-downloads-{{ checksum "~/Library/Caches/tinygrad/downloads/**/*" }}-{{ .Environment.CIRCLE_JOB }}

workflows:
  version: 2
  build_and_test:
    jobs:
      - unit_test
      - discovery_integration_test
      - chatgpt_api_integration_test