浏览代码

use llama 3.1 in tests

Alex Cheema 1 年之前
父节点
当前提交
67ad3f57a1
共有 1 个文件被更改,包括 5 次插入5 次删除
  1. 5 5
      .circleci/config.yml

+ 5 - 5
.circleci/config.yml

@@ -32,7 +32,7 @@ jobs:
           name: Run tests
           command: |
             source env/bin/activate
-            ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true
+            ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3.1-8B-Instruct-4bit/**/* || true
             METAL_XCODE=1 python3 -m exo.inference.test_inference_engine
       - save_cache:
           paths:
@@ -112,7 +112,7 @@ jobs:
           command: |
             source env/bin/activate
             # Check if cached files are present
-            ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true
+            ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3.1-8B-Instruct-4bit/**/* || true
 
             # Start first instance
             DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 &
@@ -146,7 +146,7 @@ jobs:
             curl -s http://localhost:8000/v1/chat/completions \
                 -H "Content-Type: application/json" \
                 -d '{
-                  "model": "llama-3-8b",
+                  "model": "llama-3.1-8b",
                   "messages": [{"role": "user", "content": "Keep responses concise. Placeholder to load model..."}],
                   "temperature": 0.7
                 }'
@@ -157,7 +157,7 @@ jobs:
             response_1=$(curl -s http://localhost:8000/v1/chat/completions \
               -H "Content-Type: application/json" \
               -d '{
-                "model": "llama-3-8b",
+                "model": "llama-3.1-8b",
                 "messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
                 "temperature": 0.7
               }')
@@ -169,7 +169,7 @@ jobs:
             response_2=$(curl -s http://localhost:8000/v1/chat/completions \
               -H "Content-Type: application/json" \
               -d '{
-                "model": "llama-3-8b",
+                "model": "llama-3.1-8b",
                 "messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
                 "temperature": 0.7
               }')