1 年之前 · 67ad3f57a1
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -32,7 +32,7 @@ jobs:
 
				           name: Run tests
			
 
				           command: |
			
 
				             source env/bin/activate
			
 
				-            ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true
			
 
				+            ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3.1-8B-Instruct-4bit/**/* || true
			
 
				             METAL_XCODE=1 python3 -m exo.inference.test_inference_engine
			
 
				       - save_cache:
			
 
				           paths:
			
@@ -112,7 +112,7 @@ jobs:
 
				           command: |
			
 
				             source env/bin/activate
			
 
				             # Check if cached files are present
			
 
				-            ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true
			
 
				+            ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3.1-8B-Instruct-4bit/**/* || true
			
 
				 
			
 
				             # Start first instance
			
 
				             DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 &
			
@@ -146,7 +146,7 @@ jobs:
 
				             curl -s http://localhost:8000/v1/chat/completions \
			
 
				                 -H "Content-Type: application/json" \
			
 
				                 -d '{
			
 
				-                  "model": "llama-3-8b",
			
 
				+                  "model": "llama-3.1-8b",
			
 
				                   "messages": [{"role": "user", "content": "Keep responses concise. Placeholder to load model..."}],
			
 
				                   "temperature": 0.7
			
 
				                 }'
			
@@ -157,7 +157,7 @@ jobs:
 
				             response_1=$(curl -s http://localhost:8000/v1/chat/completions \
			
 
				               -H "Content-Type: application/json" \
			
 
				               -d '{
			
 
				-                "model": "llama-3-8b",
			
 
				+                "model": "llama-3.1-8b",
			
 
				                 "messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
			
 
				                 "temperature": 0.7
			
 
				               }')
			
@@ -169,7 +169,7 @@ jobs:
 
				             response_2=$(curl -s http://localhost:8000/v1/chat/completions \
			
 
				               -H "Content-Type: application/json" \
			
 
				               -d '{
			
 
				-                "model": "llama-3-8b",
			
 
				+                "model": "llama-3.1-8b",
			
 
				                 "messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
			
 
				                 "temperature": 0.7
			
 
				               }')