7 months ago · 97a8d2e573
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -84,18 +84,22 @@ commands:
 
				             kill $PID1 $PID2
			
 
				 
			
 
				             echo ""
			
 
				-            if ! echo "$response_1" | grep -q "<<parameters.expected_output>>" || ! echo "$response_2" | grep -q "<<parameters.expected_output>>"; then
			
 
				-              echo "Test failed: Response does not contain '<<parameters.expected_output>>'"
			
 
				-              echo "Response 1: $response_1"
			
 
				+            # Extract content using jq and check if it contains expected output
			
 
				+            content1=$(echo "$response_1" | jq -r '.choices[0].message.content')
			
 
				+            content2=$(echo "$response_2" | jq -r '.choices[0].message.content')
			
 
				+
			
 
				+            if [[ "$content1" != *"<<parameters.expected_output>>"* ]] || [[ "$content2" != *"<<parameters.expected_output>>"* ]]; then
			
 
				+              echo "Test failed: Response does not match '<<parameters.expected_output>>'"
			
 
				+              echo "Response 1 content: $content1"
			
 
				               echo ""
			
 
				-              echo "Response 2: $response_2"
			
 
				+              echo "Response 2 content: $content2"
			
 
				               echo "Output of first instance:"
			
 
				               cat output1.log
			
 
				               echo "Output of second instance:"
			
 
				               cat output2.log
			
 
				               exit 1
			
 
				             else
			
 
				-              echo "Test passed: Response from both nodes contains '<<parameters.expected_output>>'"
			
 
				+              echo "Test passed: Response from both nodes matches '<<parameters.expected_output>>'"
			
 
				             fi
			
 
				 
			
 
				 jobs:
			
@@ -211,18 +215,10 @@ jobs:
 
				             pip install .
			
 
				       - run_chatgpt_api_test:
			
 
				           inference_engine: dummy
			
 
				-          model_id: dummy-model
			
 
				+          model_id: dummy
			
 
				           prompt: "Dummy prompt."
			
 
				           expected_output: "dummy"
			
 
				 
			
 
				-  test_macos_m1:
			
 
				-    macos:
			
 
				-      xcode: "16.0.0"
			
 
				-    resource_class: m2pro.large
			
 
				-    steps:
			
 
				-      - checkout
			
 
				-      - run: system_profiler SPHardwareDataType
			
 
				-
			
 
				   chatgpt_api_integration_test_tinygrad:
			
 
				     macos:
			
 
				       xcode: "16.0.0"
			
@@ -336,5 +332,4 @@ workflows:
 
				       - chatgpt_api_integration_test_mlx
			
 
				       - chatgpt_api_integration_test_tinygrad
			
 
				       - chatgpt_api_integration_test_dummy
			
 
				-      - test_macos_m1
			
 
				       - measure_pip_sizes
			
--- a/exo/inference/dummy_inference_engine.py
+++ b/exo/inference/dummy_inference_engine.py
@@ -3,9 +3,10 @@ import numpy as np
 
				 import random
			
 
				 import string
			
 
				 import asyncio
			
 
				-import json
			
 
				 from exo.inference.inference_engine import InferenceEngine
			
 
				 from exo.inference.shard import Shard
			
 
				+from exo.inference.tokenizers import DummyTokenizer
			
 
				+
			
 
				 def random_string(length: int):
			
 
				   return ''.join([random.choice(string.ascii_lowercase) for i in range(length)])
			
 
				   
			
@@ -18,15 +19,18 @@ class DummyInferenceEngine(InferenceEngine):
 
				     self.eos_token_id = 0
			
 
				     self.latency_mean = 0.1
			
 
				     self.latency_stddev = 0.02
			
 
				+    self.tokenizer = DummyTokenizer()
			
 
				 
			
 
				   async def encode(self, shard: Shard, prompt: str) -> np.ndarray:
			
 
				-    return np.random.randint(1, self.vocab_size, size=(1, len(prompt.split())))
			
 
				+    return np.array(self.tokenizer.encode(prompt))
			
 
				   
			
 
				   async def sample(self, x: np.ndarray) -> np.ndarray:
			
 
				-    return np.random.randint(1, self.vocab_size)
			
 
				+    if random.random() < 0.1:
			
 
				+      return np.array([self.tokenizer.eos_token_id])
			
 
				+    return np.array([np.random.randint(1, self.vocab_size)])
			
 
				 
			
 
				   async def decode(self, shard: Shard, tokens: np.ndarray) -> str:
			
 
				-    return ' '.join([random_string(np.random.randint(1, 34)) for token in tokens])
			
 
				+    return self.tokenizer.decode(tokens)
			
 
				 
			
 
				   async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray) -> np.ndarray:
			
 
				     await self.ensure_shard(shard)
			
--- a/exo/inference/tokenizers.py
+++ b/exo/inference/tokenizers.py
@@ -4,19 +4,24 @@ from os import PathLike
 
				 from pathlib import Path
			
 
				 from typing import Union
			
 
				 from transformers import AutoTokenizer, AutoProcessor
			
 
				+import numpy as np
			
 
				 from exo.download.hf.hf_helpers import get_local_snapshot_dir
			
 
				 from exo.helpers import DEBUG
			
 
				 
			
 
				 
			
 
				 class DummyTokenizer:
			
 
				   def __init__(self):
			
 
				-    self.eos_token_id = 0
			
 
				+    self.eos_token_id = 69
			
 
				+    self.vocab_size = 1000
			
 
				 
			
 
				   def apply_chat_template(self, messages, tokenize=True, add_generation_prompt=True):
			
 
				-    return [1, 2, 3]
			
 
				+    return "dummy_tokenized_prompt"
			
 
				+
			
 
				+  def encode(self, text):
			
 
				+    return np.random.randint(1, self.vocab_size, size=(1, len(text.split())))
			
 
				 
			
 
				   def decode(self, tokens):
			
 
				-    return "dummy"
			
 
				+    return "dummy" * len(tokens)
			
 
				 
			
 
				 
			
 
				 async def resolve_tokenizer(model_id: str):
			
--- a/exo/orchestration/standard_node.py
+++ b/exo/orchestration/standard_node.py
@@ -360,6 +360,7 @@ class StandardNode(Node):
 
				     return len(peers_added) > 0 or len(peers_removed) > 0 or len(peers_updated) > 0
			
 
				 
			
 
				   async def select_best_inference_engine(self):
			
 
				+    if self.inference_engine.__class__.__name__ == 'DummyInferenceEngine': return
			
 
				     supported_engines = self.get_supported_inference_engines()
			
 
				     await self.broadcast_supported_engines(supported_engines)
			
 
				     if len(self.get_topology_inference_engines()):